4
4
#include < ATen/SparseTensorUtils.h>
5
5
#include < ATen/Parallel.h>
6
6
#include < ATen/NamedTensorUtils.h>
7
+ #include < ATen/native/sparse/ParamUtils.h>
7
8
#include < map>
8
9
9
10
namespace at {
@@ -291,10 +292,10 @@ void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t di
291
292
if (dim >= sparse_dim) {
292
293
if (LogSoftMax) {
293
294
auto new_values = log_softmax_cpu (values, dim - sparse_dim + 1 , false );
294
- out_values.copy_ (new_values);
295
+ out_values.set_ (new_values);
295
296
} else {
296
297
auto new_values = softmax_cpu (values, dim - sparse_dim + 1 , false );
297
- out_values.copy_ (new_values);
298
+ out_values.set_ (new_values);
298
299
}
299
300
return ;
300
301
}
@@ -411,17 +412,27 @@ void cpu_sparse_coo_softmax_backward(Tensor& grad_input, const Tensor& grad, con
411
412
auto grad_offsets = get_offsets (grad_indices, sizes, -1 );
412
413
413
414
if (dim >= sparse_dim) {
414
- for (int64_t i=0 ; i<out_nnz; i++) {
415
- Tensor unused;
416
- auto low = std::lower_bound (grad_offsets.begin (), grad_offsets.end (), out_offsets[i]);
417
- auto j = low - grad_offsets.begin ();
418
- if (j < grad_nnz && out_offsets[i] == grad_offsets[j]) {
419
- if (LogSoftMax) {
420
- auto r = log_softmax_backward_cpu (grad_values[j], out_values[i], dim - sparse_dim, unused);
421
- values[i].copy_ (r);
422
- } else {
423
- auto r = softmax_backward_cpu (grad_values[j], out_values[i], dim - sparse_dim, unused);
424
- values[i].copy_ (r);
415
+ Tensor unused;
416
+ if (out_offsets == grad_offsets) {
417
+ if (LogSoftMax) {
418
+ auto r = log_softmax_backward_cpu (grad_values, out_values, dim - sparse_dim + 1 , unused);
419
+ values.set_ (r);
420
+ } else {
421
+ auto r = softmax_backward_cpu (grad_values, out_values, dim - sparse_dim + 1 , unused);
422
+ values.set_ (r);
423
+ }
424
+ } else {
425
+ for (int64_t i=0 ; i<out_nnz; i++) {
426
+ auto low = std::lower_bound (grad_offsets.begin (), grad_offsets.end (), out_offsets[i]);
427
+ auto j = low - grad_offsets.begin ();
428
+ if (j < grad_nnz && out_offsets[i] == grad_offsets[j]) {
429
+ if (LogSoftMax) {
430
+ auto r = log_softmax_backward_cpu (grad_values[j], out_values[i], dim - sparse_dim, unused);
431
+ values[i].copy_ (r);
432
+ } else {
433
+ auto r = softmax_backward_cpu (grad_values[j], out_values[i], dim - sparse_dim, unused);
434
+ values[i].copy_ (r);
435
+ }
425
436
}
426
437
}
427
438
}
@@ -503,36 +514,36 @@ void cpu_sparse_coo_softmax_backward(Tensor& grad_input, const Tensor& grad, con
503
514
});
504
515
}
505
516
506
- } // namespace
517
+ } // anonymous namespace
507
518
508
- Tensor softmax_sparse_cpu (const Tensor& input_, const int64_t dim_, const bool half_to_float) {
509
- TORCH_INTERNAL_ASSERT (input_.is_sparse ());
510
- TORCH_CHECK (!half_to_float, " softmax with half to float conversion is not supported on CPU" );
511
- auto input = input_.coalesce ();
512
- Tensor output = at::native::empty_like (input);
519
+ Tensor softmax_sparse_cpu (
520
+ const Tensor& input_,
521
+ const int64_t dim,
522
+ const bool half_to_float) {
523
+ Tensor input, output;
524
+ std::tie (input, output) = softmax_sparse_input_preprocessing (
525
+ input_, dim, half_to_float, " softmax" );
513
526
if (input.numel () == 0 ) {
514
527
return output;
515
528
}
516
- TORCH_CHECK (dim_ >= 0 && dim_ < input.dim (),
517
- " dim must be non-negative and less than input dimensions" );
518
529
AT_DISPATCH_FLOATING_TYPES (input.scalar_type (), " softmax" , [&] {
519
- cpu_sparse_coo_softmax<scalar_t , false >(output, input, dim_ );
530
+ cpu_sparse_coo_softmax<scalar_t , false >(output, input, dim );
520
531
});
521
532
return output;
522
533
}
523
534
524
- Tensor log_softmax_sparse_cpu (const Tensor& input_, const int64_t dim_, const bool half_to_float) {
525
- TORCH_INTERNAL_ASSERT (input_.is_sparse ());
526
- TORCH_CHECK (!half_to_float, " log_softmax with half to float conversion is not supported on CPU" );
527
- auto input = input_.coalesce ();
528
- Tensor output = at::native::empty_like (input);
535
+ Tensor log_softmax_sparse_cpu (
536
+ const Tensor& input_,
537
+ const int64_t dim,
538
+ const bool half_to_float) {
539
+ Tensor input, output;
540
+ std::tie (input, output) = softmax_sparse_input_preprocessing (
541
+ input_, dim, half_to_float, " log_softmax" );
529
542
if (input.numel () == 0 ) {
530
543
return output;
531
544
}
532
- TORCH_CHECK (dim_ >= 0 && dim_ < input.dim (),
533
- " dim must be non-negative and less than input dimensions" );
534
545
AT_DISPATCH_FLOATING_TYPES (input.scalar_type (), " log_softmax" , [&] {
535
- cpu_sparse_coo_softmax<scalar_t , true >(output, input, dim_ );
546
+ cpu_sparse_coo_softmax<scalar_t , true >(output, input, dim );
536
547
});
537
548
return output;
538
549
}
@@ -542,26 +553,16 @@ Tensor softmax_backward_sparse_cpu(
542
553
const Tensor& output_,
543
554
int64_t dim_,
544
555
const Tensor& input_) {
545
- TensorArg grad_arg{grad_, " grad" , 1 }, output_arg{output_, " output" , 2 };
546
- checkSameSize (" softmax_backward" , grad_arg, output_arg);
547
-
548
- int64_t dim = maybe_wrap_dim (dim_, grad_.dim ());
549
-
550
- auto grad = grad_.coalesce ();
551
- auto output = output_.coalesce ();
552
-
553
- Tensor grad_input = at::native::empty_like (output);
556
+ Tensor grad_input, grad, output;
557
+ std::tie (grad_input, grad, output) =
558
+ softmax_backward_sparse_input_preprocessing (
559
+ grad_, output_, dim_, input_, " softmax_backward" );
554
560
if (output.numel () == 0 ) {
555
561
return grad_input;
556
562
}
557
- TORCH_CHECK (
558
- dim >= 0 && dim < grad.dim (),
559
- " dim must be non-negative and less than input dimensions" );
560
- TORCH_CHECK (
561
- grad.sparse_dim () == output.sparse_dim (),
562
- " grad and output sparse dimensions must be equal" );
563
563
AT_DISPATCH_FLOATING_TYPES (grad.scalar_type (), " softmax_backward" , [&] {
564
- cpu_sparse_coo_softmax_backward<scalar_t , false >(grad_input, grad, output, dim);
564
+ cpu_sparse_coo_softmax_backward<scalar_t , false >(
565
+ grad_input, grad, output, dim_);
565
566
});
566
567
return grad_input;
567
568
}
@@ -571,26 +572,16 @@ Tensor log_softmax_backward_sparse_cpu(
571
572
const Tensor& output_,
572
573
int64_t dim_,
573
574
const Tensor& input_) {
574
- TensorArg grad_arg{grad_, " grad" , 1 }, output_arg{output_, " output" , 2 };
575
- checkSameSize (" log_softmax_backward" , grad_arg, output_arg);
576
-
577
- int64_t dim = maybe_wrap_dim (dim_, grad_.dim ());
578
-
579
- auto grad = grad_.coalesce ();
580
- auto output = output_.coalesce ();
581
-
582
- Tensor grad_input = at::native::empty_like (output);
575
+ Tensor grad_input, grad, output;
576
+ std::tie (grad_input, grad, output) =
577
+ softmax_backward_sparse_input_preprocessing (
578
+ grad_, output_, dim_, input_, " log_softmax_backward" );
583
579
if (output.numel () == 0 ) {
584
580
return grad_input;
585
581
}
586
- TORCH_CHECK (
587
- dim >= 0 && dim < grad.dim (),
588
- " dim must be non-negative and less than input dimensions" );
589
- TORCH_CHECK (
590
- grad.sparse_dim () == output.sparse_dim (),
591
- " grad and output sparse dimensions must be equal" );
592
- AT_DISPATCH_FLOATING_TYPES (grad.scalar_type (), " softmax_backward" , [&] {
593
- cpu_sparse_coo_softmax_backward<scalar_t , true >(grad_input, grad, output, dim);
582
+ AT_DISPATCH_FLOATING_TYPES (grad.scalar_type (), " log_softmax_backward" , [&] {
583
+ cpu_sparse_coo_softmax_backward<scalar_t , true >(
584
+ grad_input, grad, output, dim_);
594
585
});
595
586
return grad_input;
596
587
}
0 commit comments