PaddlePaddle · HydrogenSulfate · Oct 29, 2024 · Oct 25, 2024 · Oct 25, 2024 · Oct 26, 2024
diff --git a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc
@@ -49,6 +49,29 @@ void MinimumGradKernel(const Context& dev_ctx,
       dev_ctx, x, y, dout, dout, axis, dx, dy, MinGradDx<T>(), MinGradDy<T>());
 }
 
+template <typename T, typename Context>
+void RemainderGradKernel(const Context& dev_ctx,
+                         const DenseTensor& x,
+                         const DenseTensor& y,
+                         const DenseTensor& dout,
+                         DenseTensor* dx,
+                         DenseTensor* dy) {
+  funcs::ElementwiseGradPreProcess(dout, dx);
+  int axis = -1;
+  phi::funcs::
+      ElemwiseGradCompute<Context, T, RemainderGradDx<T>, RemainderGradDy<T>>(
+          dev_ctx,
+          x,
+          y,
+          dout,
+          dout,
+          axis,
+          dx,
+          dy,
+          RemainderGradDx<T>(),
+          RemainderGradDy<T>());
+}
+
 template <typename T, typename Context>
 void CopySignGradKernel(const Context& dev_ctx,
                         const DenseTensor& x,
@@ -111,6 +134,16 @@ PD_REGISTER_KERNEL(minimum_grad,
                    int64_t,
                    phi::dtype::bfloat16) {}
 
+PD_REGISTER_KERNEL(remainder_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::RemainderGradKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::bfloat16) {}
+
 PD_REGISTER_KERNEL(heaviside_grad,
                    CPU,
                    ALL_LAYOUT,

diff --git a/paddle/phi/kernels/elementwise_grad_kernel.h b/paddle/phi/kernels/elementwise_grad_kernel.h
@@ -51,6 +51,14 @@ void MinimumGradKernel(const Context& dev_ctx,
                        DenseTensor* dx,
                        DenseTensor* dy);
 
+template <typename T, typename Context>
+void RemainderGradKernel(const Context& dev_ctx,
+                         const DenseTensor& x,
+                         const DenseTensor& y,
+                         const DenseTensor& dout,
+                         DenseTensor* dx,
+                         DenseTensor* dy);
+
 template <typename T, typename Context>
 void HeavisideGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,

diff --git a/paddle/phi/kernels/funcs/elementwise_functor.h b/paddle/phi/kernels/funcs/elementwise_functor.h
@@ -591,6 +591,96 @@ struct RemainderFunctor<dtype::bfloat16> {
   }
 };
 
+// RemainderGradXFunctor
+template <typename T>
+struct RemainderGradXFunctor {
+  inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const {
+    // dx = dout
+    return dout;
+  }
+};
+
+// RemainderGradYFunctor
+template <typename T, typename Enable = void>
+struct RemainderGradYFunctor {
+  inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const {
+    // dy = -dout * (floor_div(x, y))
+    return -dout * static_cast<T>((std::floor(x / y)));
+  }
+};
+template <typename T>
+struct RemainderGradYFunctor<
+    T,
+    typename std::enable_if<std::is_floating_point<T>::value>::type> {
+  inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const {
+    using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+    // dy = -dout * (floor_div(x, y))
+    auto x_ = static_cast<MPType>(x);
+    auto y_ = static_cast<MPType>(y);
+    return static_cast<T>(-static_cast<MPType>(dout) * (std::floor((x_ / y_))));
+  }
+};
+template <typename T>
+struct RemainderGradYFunctor<
+    T,
+    typename std::enable_if<std::is_integral<T>::value>::type> {
+  inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const {
+    // dy = -dout * (floor_div(x, y))
+    return -dout * (x / y);
+  }
+};
+
+// RemainderGradXYFunctor
+template <typename InT, typename OutT, typename Enable = void>
+struct RemainderGradXYFunctor {
+  inline HOSTDEVICE phi::Array<OutT, 2> operator()(const InT x,
+                                                   const InT y,
+                                                   const InT dout) {
+    phi::Array<OutT, 2> outs;
+    // dx = dout
+    outs[0] = static_cast<OutT>(dout);
+    // dy = -dout * (floor_div(x, y))
+    outs[1] = static_cast<OutT>(dout * static_cast<InT>(std::floor(x / y)));
+    return outs;
+  }
+};
+template <typename InT, typename OutT>
+struct RemainderGradXYFunctor<
+    InT,
+    OutT,
+    typename std::enable_if<std::is_floating_point<InT>::value>::type> {
+  inline HOSTDEVICE Array<OutT, 2> operator()(const InT x,
+                                              const InT y,
+                                              const InT dout) {
+    Array<OutT, 2> outs;
+    // dx = dout
+    outs[0] = static_cast<OutT>(dout);
+    // dy = -dout * (x / y)
+    using MPType = typename phi::dtype::MPTypeTrait<InT>::Type;
+    auto x_ = static_cast<MPType>(x);
+    auto y_ = static_cast<MPType>(y);
+    outs[1] =
+        static_cast<OutT>(static_cast<MPType>(-dout) * std::floor(x_ / y_));
+    return outs;
+  }
+};
+template <typename InT, typename OutT>
+struct RemainderGradXYFunctor<
+    InT,
+    OutT,
+    typename std::enable_if<std::is_integral<InT>::value>::type> {
+  inline HOSTDEVICE Array<OutT, 2> operator()(const InT x,
+                                              const InT y,
+                                              const InT dout) {
+    Array<OutT, 2> outs;
+    // dx = dout
+    outs[0] = static_cast<OutT>(dout);
+    // dy = -dout * (x / y)
+    outs[1] = static_cast<OutT>(-dout * (x / y));
+    return outs;
+  }
+};
+
 template <typename T, typename Enable = void>
 struct InverseRemainderFunctor {
   inline HOSTDEVICE T operator()(const T a, const T b) const {

diff --git a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu
@@ -210,6 +210,36 @@ void MinimumGradKernel(const Context& dev_ctx,
   }
 }
 
+template <typename T, typename Context>
+void RemainderGradKernel(const Context& dev_ctx,
+                         const DenseTensor& x,
+                         const DenseTensor& y,
+                         const DenseTensor& dout,
+                         DenseTensor* dx,
+                         DenseTensor* dy) {
+  const auto place = dev_ctx.GetPlace();
+  int axis = -1;
+  if (dx != nullptr && dy != nullptr) {
+    std::vector<const DenseTensor*> ins = {&x, &y, &dout};
+    GetGradXAndYOut<T>(dev_ctx,
+                       place,
+                       axis,
+                       ins,
+                       dout,
+                       dx,
+                       dy,
+                       funcs::RemainderGradXYFunctor<T, T>());
+  } else if (dx != nullptr && dy == nullptr) {
+    std::vector<const DenseTensor*> ins = {&x, &y, &dout};
+    GetGradXOrYOut<T>(
+        dev_ctx, place, axis, ins, dout, dx, funcs::RemainderGradXFunctor<T>());
+  } else if (dy != nullptr && dx == nullptr) {
+    std::vector<const DenseTensor*> ins = {&x, &y, &dout};
+    GetGradXOrYOut<T>(
+        dev_ctx, place, axis, ins, dout, dy, funcs::RemainderGradYFunctor<T>());
+  }
+}
+
 template <typename T, typename Context>
 void CopySignGradKernel(const Context& dev_ctx,
                         const DenseTensor& x,
@@ -295,6 +325,17 @@ PD_REGISTER_KERNEL(minimum_grad,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
 
+PD_REGISTER_KERNEL(remainder_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::RemainderGradKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
+
 PD_REGISTER_KERNEL(heaviside_grad,
                    GPU,
                    ALL_LAYOUT,

diff --git a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h
@@ -1400,6 +1400,48 @@ void ElementwisePowGradKernel(const Context& dev_ctx,
       dev_ctx, x, y, dout, dout, axis, dx, dy, PowGradDX<T>(), PowGradDY<T>());
 }
 
+/*
+******************************
+    Remainder Grad
+******************************
+*/
+// RemainderGradDx
+template <typename T>
+struct RemainderGradDx {
+  HOSTDEVICE T operator()(T x, T y, T out UNUSED, T dout) const {
+    // dx = dout
+    return dout;
+  }
+};
+
+// RemainderGradDy
+template <typename T, typename Enable = void>
+struct RemainderGradDy {
+  HOSTDEVICE T operator()(T x, T y, T out UNUSED, T dout) const {
+    return -dout * (std::floor(static_cast<double>(x / y)));
+  }
+};
+template <typename T>
+struct RemainderGradDy<
+    T,
+    typename std::enable_if<std::is_floating_point<T>::value>::type> {
+  HOSTDEVICE T operator()(T x, T y, T out UNUSED, T dout) const {
+    using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+    auto x_ = static_cast<MPType>(x);
+    auto y_ = static_cast<MPType>(y);
+    return static_cast<T>(-static_cast<MPType>(dout) * (std::floor((x_ / y_))));
+  }
+};
+template <typename T>
+struct RemainderGradDy<
+    T,
+    typename std::enable_if<std::is_integral<T>::value>::type> {
+  HOSTDEVICE T operator()(T x, T y, T out UNUSED, T dout) const {
+    // dy = -dout * (x / y)
+    return -dout * static_cast<T>(std::floor(static_cast<double>(x) /
+                                             static_cast<double>(y)));
+  }
+};
 /*
 ******************************
     Copysign Grad

diff --git a/paddle/phi/ops/yaml/inconsistent/dygraph_backward.yaml b/paddle/phi/ops/yaml/inconsistent/dygraph_backward.yaml
@@ -266,6 +266,16 @@
     func : multiply_triple_grad
   optional : fwd_grad_grad_x, fwd_grad_grad_y, grad_x_grad, grad_y_grad, grad_grad_out_grad
 
+- backward_op : remainder_grad
+  forward : remainder (Tensor x, Tensor y) -> Tensor(out)
+  args : (Tensor x, Tensor y, Tensor out_grad)
+  output : Tensor(x_grad), Tensor(y_grad)
+  infer_meta :
+    func : GeneralBinaryGradInferMeta
+    param : [x, y]
+  kernel :
+    func : remainder_grad
+
 - backward_op : set_value_grad
   forward : set_value (Tensor x, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes, int64_t[] shape, Scalar[] values) -> Tensor(out)
   args : (Tensor out_grad, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes)

diff --git a/paddle/phi/ops/yaml/inconsistent/dygraph_ops.yaml b/paddle/phi/ops/yaml/inconsistent/dygraph_ops.yaml
@@ -295,10 +295,11 @@
   output : Tensor (out)
   infer_meta :
     func : ElementwiseInferMeta
+    param: [x, y]
   kernel :
     func : remainder
   inplace : (x -> out)
-  traits : paddle::dialect::ForwardOnlyTrait
+  backward: remainder_grad
 
 - op : set_value
   args : (Tensor x, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes, int64_t[] shape, Scalar[] values)

diff --git a/paddle/phi/ops/yaml/inconsistent/static_backward.yaml b/paddle/phi/ops/yaml/inconsistent/static_backward.yaml
@@ -472,6 +472,16 @@
     data_type : out_grad_in
   inplace : (out_grad_in -> out_grad_out)
 
+- backward_op : remainder_grad
+  forward : remainder (Tensor x, Tensor y) -> Tensor(out)
+  args : (Tensor x, Tensor y, Tensor out_grad)
+  output : Tensor(x_grad), Tensor(y_grad)
+  infer_meta :
+    func : GeneralBinaryGradInferMeta
+    param : [x, y]
+  kernel :
+    func : remainder_grad
+
 - backward_op : row_conv_grad
   forward: row_conv (Tensor x, Tensor filter) -> Tensor(out)
   args: (Tensor x, Tensor filter, Tensor out_grad)

diff --git a/paddle/phi/ops/yaml/inconsistent/static_ops.yaml b/paddle/phi/ops/yaml/inconsistent/static_ops.yaml
@@ -779,13 +779,14 @@
   output : Tensor (out)
   infer_meta :
     func : ElementwiseInferMeta
+    param: [x, y]
   kernel :
     func : remainder
   data_transform :
     support_trans_dtype : x, y
   inplace : (x -> out)
   interfaces : paddle::dialect::InferSymbolicShapeInterface
-  traits : paddle::dialect::ForwardOnlyTrait
+  backward: remainder_grad
 
 - op : row_conv
   args : (Tensor x, Tensor filter)

diff --git a/paddle/phi/ops/yaml/legacy/static_backward.yaml b/paddle/phi/ops/yaml/legacy/static_backward.yaml
@@ -380,6 +380,16 @@
     func : prod_grad
   composite: prod_grad(x, out, out_grad, axis, keepdim, reduce_all, x_grad)
 
+- backward_op : remainder_grad
+  forward : remainder (Tensor x, Tensor y, int axis = -1) -> Tensor(out)
+  args : (Tensor x, Tensor y, Tensor out_grad)
+  output : Tensor(x_grad), Tensor(y_grad)
+  infer_meta :
+    func : GeneralBinaryGradInferMeta
+    param : [x, y]
+  kernel :
+    func : remainder_grad
+
 - backward_op : rnn_grad
   forward : rnn (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, float dropout_prob=0.0, bool is_bidirec=false, int input_size=10, int hidden_size=100, int num_layers=1, str mode="RNN_TANH", int seed=0, bool is_test=false) -> Tensor(out), Tensor(dropout_state_out), Tensor[](state), Tensor(reserve)
   args : (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, Tensor out, Tensor dropout_state_out, Tensor reserve, Tensor out_grad, Tensor[] state_grad, float dropout_prob, bool is_bidirec, int input_size, int hidden_size, int num_layers, str mode, int seed, bool is_test)

diff --git a/paddle/phi/ops/yaml/legacy/static_ops.yaml b/paddle/phi/ops/yaml/legacy/static_ops.yaml
@@ -739,10 +739,11 @@
   output : Tensor (out)
   infer_meta :
     func : ElementwiseRawInferMeta
+    param: [x, y]
   kernel :
     func : remainder
   inplace : (x -> out)
-  traits : paddle::dialect::ForwardOnlyTrait
+  backward: remainder_grad
 
 - op : rnn
   args: (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, float dropout_prob=0.0, bool is_bidirec=false, int input_size=10, int hidden_size=100, int num_layers=1, str mode="RNN_TANH", int seed=0, bool is_test=false)

diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
@@ -3513,7 +3513,6 @@
     data_type : x
   traits : paddle::dialect::ForwardOnlyTrait
   interfaces : paddle::dialect::InferSymbolicShapeInterface
-  traits : paddle::dialect::ForwardOnlyTrait
 
 - op : multiplex
   args : (Tensor[] inputs, Tensor index)