Oneflow-Inc · youxiudeshouyeren · Apr 12, 2023 · Apr 1, 2023 · Apr 1, 2023 · Apr 1, 2023
@@ -8,6 +8,7 @@ The oneflow.special module, modeled after SciPy's special module.
     :toctree: generated
     :nosignatures:
 
+    digamma
     erf
     erfc
     erfinv

@@ -225,6 +225,7 @@ Tensor class reference
     Tensor.div_
     Tensor.double
     Tensor.dtype 
+    Tensor.digamma
     Tensor.element_size
     Tensor.eq
     Tensor.equal

diff --git a/oneflow/api/python/framework/tensor_functions.cpp b/oneflow/api/python/framework/tensor_functions.cpp
@@ -200,6 +200,7 @@ PyNumberMethods PyTensorObject_as_number = {
   }
 
 UNARY_METHOD(PyTensorObject_abs, functional::Abs);
+UNARY_METHOD(PyTensorObject_digamma, functional::Digamma);
 UNARY_METHOD(PyTensorObject_exp, functional::Exp);
 UNARY_METHOD(PyTensorObject_exp2, functional::Exp2);
 UNARY_METHOD(PyTensorObject_floor, functional::Floor);
@@ -1102,6 +1103,7 @@ PyMethodDef PyTensorObject_extra_methods[] = {
 
     // macro UNARY_METHOD
     {"abs", PyTensorObject_abs, METH_NOARGS, NULL},
+    {"digamma", PyTensorObject_digamma, METH_NOARGS, NULL},
     {"exp", PyTensorObject_exp, METH_NOARGS, NULL},
     {"exp2", PyTensorObject_exp2, METH_NOARGS, NULL},
     {"floor", PyTensorObject_floor, METH_NOARGS, NULL},

diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h
@@ -21,6 +21,12 @@ limitations under the License.
 
 namespace oneflow {
 
+/*
+ * math constants
+ */
+template<typename T>
+constexpr T pi = static_cast<T>(3.141592653589793238462643383279502);
+
 int64_t Gcd(int64_t m, int64_t n);
 
 int64_t Lcm(int64_t m, int64_t n);

@@ -140,15 +140,16 @@ inline bool IsDimsEquals(size_t num_src0_dims, const int64_t* src0_dims, size_t
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kAtanhBackwardWithDyX) \
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCosBackwardWithDyX)
 
-#define BINARY_MATH_BACKWARD_OP_SEQ_1                    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX)  \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX)    \
+#define BINARY_MATH_BACKWARD_OP_SEQ_1                     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX)     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX)     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX)   \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX)  \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX) \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX)     \
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLog2BackwardWithDyX)
 
 #define BINARY_MATH_BACKWARD_OP_SEQ_2                             \

@@ -54,6 +54,7 @@ namespace primitive {
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCeil)            \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCos)             \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCosh)            \
+  OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kDigamma)         \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kErf)             \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kErfc)            \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kExp)             \

@@ -353,6 +353,16 @@ struct BinaryFunctor<DeviceType::kCPU, BinaryOp::kErfcBackwardWithDyX, Src, Dst>
   }
 };
 
+template<typename Src, typename Dst>
+struct BinaryFunctor<DeviceType::kCPU, BinaryOp::kDigammaBackwardWithDyX, Src, Dst> {
+  OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {}
+  OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const {
+    // TODO:shijiaxing： This function is named trigamma, it will be implemented soon.
+    UNIMPLEMENTED();
+    return 0;
+  }
+};
+
 #define SPECIALIZATION_CPU_BINARY_FUNCTOR(op, type)                                          \
   template<>                                                                                 \
   struct BinaryFunctor<DeviceType::kCPU, op, type, type> {                                   \

@@ -15,6 +15,7 @@ limitations under the License.
 */
 #include "oneflow/core/ep/common/primitive/unary_functor.h"
 #include "oneflow/core/ep/cpu/primitive/type_seq.h"
+#include "oneflow/core/common/math_util.h"
 
 namespace oneflow {
 namespace ep {
@@ -120,6 +121,139 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kRsqrt, Dst, Src> {
   }
 };
 
+template<>
+struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, float, float> {
+  OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
+
+  OF_DEVICE_FUNC float operator()(float src) const {
+    // references
+    // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L434-L487
+    const auto& calc_digamma = [](float x) {
+      std::function<float(float)> compute;
+      compute = [&](float x) {
+        static float PSI_10 = 2.25175258906672110764f;
+        if (x == 0) {
+          // As per C++ standard for gamma related functions and SciPy,
+          // If the argument is ±0, ±∞ is returned
+          return std::copysign(INFINITY, -x);
+        }
+
+        bool x_is_integer = x == truncf(x);
+        if (x < 0) {
+          if (x_is_integer) {
+            // As per C++ standard for gamma related functions and SciPy,
+            // If the argument is a negative integer, NaN is returned
+            return std::numeric_limits<float>::quiet_NaN();
+          }
+          // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+          // accurate than tan(pi * x). While these operations are mathematically equivalent
+          // since both x and r are in radians and tan() has a periodicity of pi, in practice
+          // the computation of pi * x is a source of error (when |x| > 1).
+          double q, r;
+          r = std::modf(x, &q);
+          float pi_over_tan_pi_x = (float)(pi<double> / tan(pi<double> * r));
+          return compute(1 - x) - pi_over_tan_pi_x;
+        }
+
+        // Push x to be >= 10
+        float result = 0;
+        while (x < 10) {
+          result -= 1 / x;
+          x += 1;
+        }
+        if (x == 10) { return result + PSI_10; }
+
+        // Compute asymptotic digamma
+        static const float A[] = {
+            8.33333333333333333333E-2f,  -2.10927960927960927961E-2f, 7.57575757575757575758E-3f,
+            -4.16666666666666666667E-3f, 3.96825396825396825397E-3f,  -8.33333333333333333333E-3f,
+            8.33333333333333333333E-2f,
+        };
+
+        float y = 0;
+        if (x < 1.0e17f) {
+          float z = 1 / (x * x);
+          float polevl_result = 0;
+          for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+          y = z * polevl_result;
+        }
+        return result + logf(x) - (0.5f / x) - y;
+      };
+
+      return compute(x);
+    };
+
+    return calc_digamma(src);
+  }
+};
+
+template<>
+struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, double, double> {
+  OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
+
+  OF_DEVICE_FUNC double operator()(double src) const {
+    // references
+    // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L376-L428
+    const auto& calc_digamma = [](double x) {
+      std::function<double(double)> compute;
+      compute = [&](double x) {
+        static double PSI_10 = 2.25175258906672110764;
+        if (x == 0) {
+          // As per C++ standard for gamma related functions and SciPy,
+          // If the argument is ±0, ±∞ is returned
+          return std::copysign(INFINITY, -x);
+        }
+
+        bool x_is_integer = x == trunc(x);
+        if (x < 0) {
+          if (x_is_integer) {
+            // As per C++ standard for gamma related functions and SciPy,
+            // If the argument is a negative integer, NaN is returned
+            return std::numeric_limits<double>::quiet_NaN();
+          }
+          // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+          // accurate than tan(pi * x). While these operations are mathematically equivalent
+          // since both x and r are in radians and tan() has a periodicity of pi, in practice
+          // the computation of pi * x is a source of error (when |x| > 1).
+          double q, r;
+          r = std::modf(x, &q);
+          return compute(1 - x) - pi<double> / tan(pi<double> * r);
+        }
+
+        // Push x to be >= 10
+        double result = 0;
+        while (x < 10) {
+          result -= 1 / x;
+          x += 1;
+        }
+        if (x == 10) { return result + PSI_10; }
+
+        // Compute asymptotic digamma
+        static const double A[] = {
+            8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
+            -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
+            8.33333333333333333333E-2,
+        };
+
+        double y = 0;
+        if (x < 1.0e17) {
+          double z = 1.0 / (x * x);
+          // y = z * polevl(z, A, 6);
+
+          double polevl_result = 0;
+          for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+          y = z * polevl_result;
+        }
+        return result + log(x) - (0.5 / x) - y;
+      };
+
+      return compute(x);
+    };
+
+    return calc_digamma(src);
+  }
+};
+
 template<>
 struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kAbs, bfloat16, bfloat16> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
@@ -187,6 +321,7 @@ SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kReciprocalNoNan);
 SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNotEqualZero);
 SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kFastGelu);
 SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kQuickGelu);
+SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kDigamma);
 
 template<>
 struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kIsInf, bool, bfloat16> {

@@ -240,6 +240,16 @@ struct BinaryFunctor<DeviceType::kCUDA, BinaryOp::kIsClose, Src, Dst> {
   float atol, rtol;
 };
 
+template<typename Src, typename Dst>
+struct BinaryFunctor<DeviceType::kCUDA, BinaryOp::kDigammaBackwardWithDyX, Src, Dst> {
+  OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {}
+  OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const {
+    // TODO:shijiaxing： This function is named trigamma, it will be implemented soon.
+    assert(false);
+    return static_cast<Dst>(0.0);
+  }
+};
+
 #define SPECIALIZATION_INTEGRAL_CLOSENESS_BINARY_FUNCTOR(op, type)                            \
   template<typename Dst>                                                                      \
   struct BinaryFunctor<DeviceType::kCUDA, op, type, Dst> {                                    \

@@ -18,6 +18,7 @@ limitations under the License.
 #include "oneflow/core/cuda/elementwise.cuh"
 #include "oneflow/core/ep/cuda/cuda_stream.h"
 #include <cuda.h>
+#include "oneflow/core/common/math_util.h"
 
 namespace oneflow {
 namespace ep {
@@ -223,6 +224,65 @@ struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kTrunc, double, double> {
   OF_DEVICE_FUNC double operator()(double src) const { return trunc(src); }
 };
 
+template<typename Dst, typename Src>
+struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
+  OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
+
+  OF_DEVICE_FUNC Dst operator()(Src in) const {
+    // references
+    // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/cuda/Math.cuh#L3029-L3090
+    static const double PI_f64 = 3.14159265358979323846;
+    const Src PSI_10 = 2.25175258906672110764;
+    const Src A[] = {
+        8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
+        -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
+        8.33333333333333333333E-2,
+    };
+
+    Src x = static_cast<Src>(in);
+    if (x == static_cast<Src>(0)) {
+      // As per C++ standard for gamma related functions and SciPy,
+      // If the argument is ±0, ±∞ is returned
+      return std::copysign(static_cast<Src>(INFINITY), -x);
+    }
+
+    bool x_is_integer = x == trunc(x);
+    Src result = static_cast<Src>(0);
+    if (x < 0) {
+      if (x_is_integer) {
+        // As per C++ standard for gamma related functions and SciPy,
+        // If the argument is a negative integer, NaN is returned
+        return static_cast<Src>(NAN);
+      }
+      // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+      // accurate than tan(pi * x). While these operations are mathematically equivalent
+      // since both x and r are in radians and tan() has a periodicity of pi, in practice
+      // the computation of pi * x is a source of error (when |x| > 1).
+      double q, r;
+      r = modf(static_cast<double>(x), &q);
+      result = static_cast<Src>(-PI_f64 / tan(PI_f64 * r));
+      x = static_cast<Src>(1) - x;
+    }
+
+    while (x < 10) {
+      result -= static_cast<Src>(1) / x;
+      x += 1;
+    }
+    if (x == static_cast<Src>(10)) { return static_cast<Src>(result + PSI_10); }
+
+    Src y = 0;
+    if (x < 1.0e17) {
+      Src z = static_cast<Src>(1) / (x * x);
+
+      Src polevl_result = 0;
+      for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+      y = z * polevl_result;
+    }
+
+    return static_cast<Src>(log(x) - (static_cast<Src>(0.5) / x) - y + result);
+  }
+};
+
 template<>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kAbs, half, half> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
@@ -351,6 +411,7 @@ SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kAtanh);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCeil);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCos);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCosh);
+SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kDigamma);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kErf);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kErfc);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kExp);
@@ -443,6 +504,7 @@ SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNotEqualZero);
 SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNanAssign);
 SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kFastGelu);
 SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kQuickGelu);
+SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kDigamma);
 
 template<>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kIsInf, bool, nv_bfloat16> {

@@ -92,6 +92,7 @@ enum class BinaryOp {
   kExp2BackwardWithDyX,
   kExpm1BackwardWithDyX,
   kLgammaBackwardWithDyX,
+  kDigammaBackwardWithDyX,
   kLogBackwardWithDyX,
   kLog2BackwardWithDyX,
   kLog10BackwardWithDyX,

@@ -54,6 +54,7 @@ enum class UnaryOp {
   kCeil,
   kCos,
   kCosh,
+  kDigamma,
   kErf,
   kErfc,
   kExp,

@@ -3338,3 +3338,11 @@
 - name: "frac_"
   signature: "Tensor (Tensor x) => FracInplace"
   bind_python: True
+
+- name: "digamma"
+  signature: "Tensor (Tensor x) => Digamma"
+  bind_python: True
+
+- name: "digamma_grad"
+  signature: "Tensor (Tensor x, Tensor dy) => DigammaGrad"
+  bind_python: False
-Original file line number
+Diff line change
@@ Expand Up @@
         :toctree: generated
         :nosignatures:
+        digamma
         erf
         erfc
         erfinv
@@ Expand Down @@