From cb8ffeaa066564a76b3fbda8d870b0d8e4d8c269 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sat, 1 Apr 2023 12:13:25 +0000
Subject: [PATCH 01/21] digamma  op  dev

---
 oneflow/core/common/math_util.cpp             | 112 ++++++++++++++++++
 oneflow/core/common/math_util.h               |  69 +++++++++++
 .../primitive/broadcast_elementwise_binary.h  |  19 +--
 .../ep/common/primitive/elementwise_unary.h   |   1 +
 .../core/ep/cpu/primitive/binary_functor.h    |  10 ++
 oneflow/core/ep/cpu/primitive/unary_functor.h |  10 +-
 .../core/ep/cuda/primitive/binary_functor.cuh |  10 ++
 .../core/ep/cuda/primitive/unary_functor.cuh  |  29 +++++
 oneflow/core/ep/include/primitive/binary_op.h |   1 +
 oneflow/core/ep/include/primitive/unary_op.h  |   1 +
 oneflow/core/functional/functional_api.yaml   |  11 +-
 .../core/functional/impl/unary_functor.cpp    |   4 +-
 oneflow/ir/include/OneFlow/OneFlowUserOps.td  |  26 ++++
 ...ath_unary_elementwise_primitive_kernel.cpp |   2 +
 oneflow/user/ops/math_unary_elementwise_seq.h |   2 +
 python/oneflow/special/__init__.py            |   1 +
 python/oneflow/special/special_ops.py         |   3 +
 17 files changed, 299 insertions(+), 12 deletions(-)
diff --git a/oneflow/core/common/math_util.cpp b/oneflow/core/common/math_util.cpp
index a60f37b6400..f5ce19d4491 100644
--- a/oneflow/core/common/math_util.cpp
+++ b/oneflow/core/common/math_util.cpp
@@ -29,4 +29,116 @@ int64_t Gcd(int64_t m, int64_t n) {
 
 int64_t Lcm(int64_t m, int64_t n) { return m * n / Gcd(m, n); }
 
+template<typename T>
+T polevl(const T x, const T A[], size_t len) {
+  T result = 0;
+  for (size_t i = 0; i <= len; i++) { result = result * x + A[i]; }
+  return result;
+}
+
+/*
+ * This function is derived from the implementation of the digamma function in the Cephes Math
+ * Library. See note [3-Clause BSD License for the Cephes Math Library].
+ */
+
+double calc_digamma_cpu(double x) {
+  static double PSI_10 = 2.25175258906672110764;
+  if (x == 0) {
+    // As per C++ standard for gamma related functions and SciPy,
+    // If the argument is ±0, ±∞ is returned
+    return std::copysign(INFINITY, -x);
+  }
+
+  bool x_is_integer = x == trunc(x);
+  if (x < 0) {
+    if (x_is_integer) {
+      // As per C++ standard for gamma related functions and SciPy,
+      // If the argument is a negative integer, NaN is returned
+      return std::numeric_limits<double>::quiet_NaN();
+    }
+    // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+    // accurate than tan(pi * x). While these operations are mathematically equivalent
+    // since both x and r are in radians and tan() has a periodicity of pi, in practice
+    // the computation of pi * x is a source of error (when |x| > 1).
+    double q, r;
+    r = std::modf(x, &q);
+    return calc_digamma_cpu(1 - x) - pi<double> / tan(pi<double> * r);
+  }
+
+  // Push x to be >= 10
+  double result = 0;
+  while (x < 10) {
+    result -= 1 / x;
+    x += 1;
+  }
+  if (x == 10) { return result + PSI_10; }
+
+  // Compute asymptotic digamma
+  static const double A[] = {
+      8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
+      -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
+      8.33333333333333333333E-2,
+  };
+
+  double y = 0;
+  if (x < 1.0e17) {
+    double z = 1.0 / (x * x);
+    y = z * polevl(z, A, 6);
+  }
+  return result + log(x) - (0.5 / x) - y;
+}
+
+/*
+ * This function is derived from the implementation of the digamma function in the Cephes Math
+ * Library. See note [3-Clause BSD License for the Cephes Math Library].
+ */
+
+float calc_digamma_cpu(float x) {
+  static float PSI_10 = 2.25175258906672110764f;
+  if (x == 0) {
+    // As per C++ standard for gamma related functions and SciPy,
+    // If the argument is ±0, ±∞ is returned
+    return std::copysign(INFINITY, -x);
+  }
+
+  bool x_is_integer = x == truncf(x);
+  if (x < 0) {
+    if (x_is_integer) {
+      // As per C++ standard for gamma related functions and SciPy,
+      // If the argument is a negative integer, NaN is returned
+      return std::numeric_limits<float>::quiet_NaN();
+    }
+    // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+    // accurate than tan(pi * x). While these operations are mathematically equivalent
+    // since both x and r are in radians and tan() has a periodicity of pi, in practice
+    // the computation of pi * x is a source of error (when |x| > 1).
+    double q, r;
+    r = std::modf(x, &q);
+    float pi_over_tan_pi_x = (float)(pi<double> / tan(pi<double> * r));
+    return calc_digamma_cpu(1 - x) - pi_over_tan_pi_x;
+  }
+
+  // Push x to be >= 10
+  float result = 0;
+  while (x < 10) {
+    result -= 1 / x;
+    x += 1;
+  }
+  if (x == 10) { return result + PSI_10; }
+
+  // Compute asymptotic digamma
+  static const float A[] = {
+      8.33333333333333333333E-2f,  -2.10927960927960927961E-2f, 7.57575757575757575758E-3f,
+      -4.16666666666666666667E-3f, 3.96825396825396825397E-3f,  -8.33333333333333333333E-3f,
+      8.33333333333333333333E-2f,
+  };
+
+  float y = 0;
+  if (x < 1.0e17f) {
+    float z = 1 / (x * x);
+    y = z * polevl(z, A, 6);
+  }
+  return result + logf(x) - (0.5f / x) - y;
+}
+
 }  // namespace oneflow
diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h
index b0361f90c73..7aba95e5cdb 100644
--- a/oneflow/core/common/math_util.h
+++ b/oneflow/core/common/math_util.h
@@ -25,6 +25,67 @@ int64_t Gcd(int64_t m, int64_t n);
 
 int64_t Lcm(int64_t m, int64_t n);
 
+template<typename T>
+ T polevl(const T x, const T A[], size_t len);
+
+// This function references pytorch/aten/src/ATen/native/Math.h
+double calc_digamma_cpu(double x);
+
+float calc_digamma_cpu(float x);
+
+template<typename scalar_t, typename accscalar_t>
+static OF_DEVICE_FUNC scalar_t calc_digamma_cuda(scalar_t in) {
+  static const double PI_f64 = 3.14159265358979323846;
+  const accscalar_t PSI_10 = 2.25175258906672110764;
+  const accscalar_t A[] = {
+      8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
+      -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
+      8.33333333333333333333E-2,
+  };
+
+  accscalar_t x = static_cast<accscalar_t>(in);
+  if (x == static_cast<accscalar_t>(0)) {
+    // As per C++ standard for gamma related functions and SciPy,
+    // If the argument is ±0, ±∞ is returned
+    return std::copysign(static_cast<scalar_t>(INFINITY), -x);
+  }
+
+  bool x_is_integer = x == trunc(x);
+  accscalar_t result = static_cast<accscalar_t>(0);
+  if (x < 0) {
+    if (x_is_integer) {
+      // As per C++ standard for gamma related functions and SciPy,
+      // If the argument is a negative integer, NaN is returned
+      return static_cast<scalar_t>(NAN);
+    }
+    // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+    // accurate than tan(pi * x). While these operations are mathematically equivalent
+    // since both x and r are in radians and tan() has a periodicity of pi, in practice
+    // the computation of pi * x is a source of error (when |x| > 1).
+    double q, r;
+    r = modf(static_cast<double>(x), &q);
+    result = static_cast<accscalar_t>(-PI_f64 / tan(PI_f64 * r));
+    x = static_cast<accscalar_t>(1) - x;
+  }
+
+  while (x < 10) {
+    result -= static_cast<accscalar_t>(1) / x;
+    x += 1;
+  }
+  if (x == static_cast<accscalar_t>(10)) { return static_cast<scalar_t>(result + PSI_10); }
+
+  accscalar_t y = 0;
+  if (x < 1.0e17) {
+    accscalar_t z = static_cast<accscalar_t>(1) / (x * x);
+
+    accscalar_t polevl_result = 0;
+    for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+    y = z * polevl_result;
+  }
+
+  return static_cast<scalar_t>(log(x) - (static_cast<accscalar_t>(0.5) / x) - y + result);
+}
+
 template<typename T>
 OF_DEVICE_FUNC T DeviceMin(T a, T b) {
 #if defined(__CUDA_ARCH__)
@@ -43,6 +104,14 @@ OF_DEVICE_FUNC T DeviceMax(T a, T b) {
 #endif
 }
 
+template<typename T>
+constexpr T pi = static_cast<T>(3.141592653589793238462643383279502);
+
+// template <typename T>
+// inline constexpr T pi() {
+//   return static_cast<T>(3.141592653589793238462643383279502);
+// }
+
 }  // namespace oneflow
 
 #endif  // ONEFLOW_CORE_COMMON_MATH_UTIL_H_
diff --git a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
index 70c1382a559..2b4ad33a9d1 100644
--- a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
+++ b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
@@ -131,15 +131,16 @@ inline bool IsDimsEquals(size_t num_src0_dims, const int64_t* src0_dims, size_t
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kAtanhBackwardWithDyX) \
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCosBackwardWithDyX)
 
-#define BINARY_MATH_BACKWARD_OP_SEQ_1                    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX)  \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX)    \
+#define BINARY_MATH_BACKWARD_OP_SEQ_1                     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX)     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX)     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX)   \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX)  \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX) \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX)     \
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLog2BackwardWithDyX)
 
 #define BINARY_MATH_BACKWARD_OP_SEQ_2                             \
diff --git a/oneflow/core/ep/common/primitive/elementwise_unary.h b/oneflow/core/ep/common/primitive/elementwise_unary.h
index d5d14ef6c80..136b587d9c8 100644
--- a/oneflow/core/ep/common/primitive/elementwise_unary.h
+++ b/oneflow/core/ep/common/primitive/elementwise_unary.h
@@ -54,6 +54,7 @@ namespace primitive {
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCeil)            \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCos)             \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCosh)            \
+  OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kDigamma)         \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kErf)             \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kErfc)            \
   OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kExp)             \
diff --git a/oneflow/core/ep/cpu/primitive/binary_functor.h b/oneflow/core/ep/cpu/primitive/binary_functor.h
index 4b25663a73a..2c748c58968 100644
--- a/oneflow/core/ep/cpu/primitive/binary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/binary_functor.h
@@ -353,6 +353,16 @@ struct BinaryFunctor<DeviceType::kCPU, BinaryOp::kErfcBackwardWithDyX, Src, Dst>
   }
 };
 
+template<typename Src, typename Dst>
+struct BinaryFunctor<DeviceType::kCPU, BinaryOp::kDigammaBackwardWithDyX, Src, Dst> {
+  OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {}
+  OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const {
+    // TODO:shijiaxing： This function is named trigamma, it will be implemented soon.
+    assert(false);
+    return 0;
+  }
+};
+
 #define SPECIALIZATION_CPU_BINARY_FUNCTOR(op, type)                                          \
   template<>                                                                                 \
   struct BinaryFunctor<DeviceType::kCPU, op, type, type> {                                   \
diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h
index b3119769644..cc0b94d33a8 100644
--- a/oneflow/core/ep/cpu/primitive/unary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/unary_functor.h
@@ -15,7 +15,7 @@ limitations under the License.
 */
 #include "oneflow/core/ep/common/primitive/unary_functor.h"
 #include "oneflow/core/ep/cpu/primitive/type_seq.h"
-
+#include "oneflow/core/common/math_util.h"
 namespace oneflow {
 namespace ep {
 namespace primitive {
@@ -120,6 +120,13 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kRsqrt, Dst, Src> {
   }
 };
 
+template<typename Dst, typename Src>
+struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, Dst, Src> {
+  OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
+
+  OF_DEVICE_FUNC Dst operator()(Src src) const { return static_cast<Dst>(calc_digamma_cpu(src)); }
+};
+
 template<>
 struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kAbs, bfloat16, bfloat16> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
@@ -187,6 +194,7 @@ SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kReciprocalNoNan);
 SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNotEqualZero);
 SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kFastGelu);
 SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kQuickGelu);
+SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kDigamma);
 
 template<>
 struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kIsInf, bool, bfloat16> {
diff --git a/oneflow/core/ep/cuda/primitive/binary_functor.cuh b/oneflow/core/ep/cuda/primitive/binary_functor.cuh
index d62f50e4628..252eb3b418f 100644
--- a/oneflow/core/ep/cuda/primitive/binary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/binary_functor.cuh
@@ -240,6 +240,16 @@ struct BinaryFunctor<DeviceType::kCUDA, BinaryOp::kIsClose, Src, Dst> {
   float atol, rtol;
 };
 
+template<typename Src, typename Dst>
+struct BinaryFunctor<DeviceType::kCUDA, BinaryOp::kDigammaBackwardWithDyX, Src, Dst> {
+  OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {}
+  OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const {
+    // TODO:shijiaxing： This function is named trigamma, it will be implemented soon.
+    assert(false);
+    return static_cast<Dst>(0.0);
+  }
+};
+
 #define SPECIALIZATION_INTEGRAL_CLOSENESS_BINARY_FUNCTOR(op, type)                            \
   template<typename Dst>                                                                      \
   struct BinaryFunctor<DeviceType::kCUDA, op, type, Dst> {                                    \
diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
index 91196a6c382..820a135d0e5 100644
--- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
@@ -18,6 +18,7 @@ limitations under the License.
 #include "oneflow/core/cuda/elementwise.cuh"
 #include "oneflow/core/ep/cuda/cuda_stream.h"
 #include <cuda.h>
+#include "oneflow/core/common/math_util.h"
 
 namespace oneflow {
 namespace ep {
@@ -223,6 +224,33 @@ struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kTrunc, double, double> {
   OF_DEVICE_FUNC double operator()(double src) const { return trunc(src); }
 };
 
+
+template<typename Dst, typename Src>
+struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
+  OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
+
+  OF_DEVICE_FUNC Dst operator()(Src src) const {
+    return static_cast<Dst>(calc_digamma_cuda<Src,Src>(src));
+  }
+};
+
+template<>
+struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, half, half> {
+  OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
+
+  OF_DEVICE_FUNC half operator()(half src) const {
+    return  calc_digamma_cuda<half,float>(src);
+  }
+};
+// template<>
+// struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, double, double> {
+//   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
+
+//   OF_DEVICE_FUNC double operator()(double src) const {
+//     return calc_digamma_cuda<double>(src);
+//   }
+// };
+
 template<>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kAbs, half, half> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
@@ -443,6 +471,7 @@ SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNotEqualZero);
 SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNanAssign);
 SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kFastGelu);
 SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kQuickGelu);
+SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kDigamma);
 
 template<>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kIsInf, bool, nv_bfloat16> {
diff --git a/oneflow/core/ep/include/primitive/binary_op.h b/oneflow/core/ep/include/primitive/binary_op.h
index 6447d7baf3e..7f003e6c025 100644
--- a/oneflow/core/ep/include/primitive/binary_op.h
+++ b/oneflow/core/ep/include/primitive/binary_op.h
@@ -92,6 +92,7 @@ enum class BinaryOp {
   kExp2BackwardWithDyX,
   kExpm1BackwardWithDyX,
   kLgammaBackwardWithDyX,
+  kDigammaBackwardWithDyX,
   kLogBackwardWithDyX,
   kLog2BackwardWithDyX,
   kLog10BackwardWithDyX,
diff --git a/oneflow/core/ep/include/primitive/unary_op.h b/oneflow/core/ep/include/primitive/unary_op.h
index b20bbb28760..b487cf25cc7 100644
--- a/oneflow/core/ep/include/primitive/unary_op.h
+++ b/oneflow/core/ep/include/primitive/unary_op.h
@@ -54,6 +54,7 @@ enum class UnaryOp {
   kCeil,
   kCos,
   kCosh,
+  kDigamma,
   kErf,
   kErfc,
   kExp,
diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml
index 383e4c9f031..b77157a8e9d 100644
--- a/oneflow/core/functional/functional_api.yaml
+++ b/oneflow/core/functional/functional_api.yaml
@@ -3309,4 +3309,13 @@
 
 - name: "frac_"
   signature: "Tensor (Tensor x) => FracInplace"
-  bind_python: True
\ No newline at end of file
+  bind_python: True
+
+- name: "digamma"
+  signature: "Tensor (Tensor x) => Digamma"
+  bind_python: True
+
+- name: "digamma_grad"
+  signature: "Tensor (Tensor x, Tensor dy) => DigammaGrad"
+  bind_python: False
+  
\ No newline at end of file
diff --git a/oneflow/core/functional/impl/unary_functor.cpp b/oneflow/core/functional/impl/unary_functor.cpp
index 6d48b088613..3d1733efce0 100644
--- a/oneflow/core/functional/impl/unary_functor.cpp
+++ b/oneflow/core/functional/impl/unary_functor.cpp
@@ -64,7 +64,8 @@ namespace impl {
   OF_PP_MAKE_TUPLE_SEQ("sqrt", Sqrt)                 \
   OF_PP_MAKE_TUPLE_SEQ("square", Square)             \
   OF_PP_MAKE_TUPLE_SEQ("tan", Tan)                   \
-  OF_PP_MAKE_TUPLE_SEQ("tanh", Tanh)
+  OF_PP_MAKE_TUPLE_SEQ("tanh", Tanh)                 \
+  OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma)
 
 #define FLOAT_UNARY_PRIMITIVE_FUNC_BWD_WITH_DY_Y_SEQ OF_PP_MAKE_TUPLE_SEQ("sigmoid", Sigmoid)
 
@@ -179,6 +180,7 @@ ONEFLOW_FUNCTION_LIBRARY(m) {
   m.add_functor<CeilFunctor>("Ceil");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Cos, "Cos");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Cosh, "Cosh");
+  ADD_UNARY_FUNCTOR_WITH_DY_X(Digamma,"Digamma");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Erf, "Erf");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Erfc, "Erfc");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Exp, "Exp");
diff --git a/oneflow/ir/include/OneFlow/OneFlowUserOps.td b/oneflow/ir/include/OneFlow/OneFlowUserOps.td
index edbe87d871f..4444fff724c 100644
--- a/oneflow/ir/include/OneFlow/OneFlowUserOps.td
+++ b/oneflow/ir/include/OneFlow/OneFlowUserOps.td
@@ -4584,6 +4584,32 @@ def OneFlow_LgammaGradOp : OneFlow_BaseOp<"lgamma_grad", [NoSideEffect, DeclareO
   let has_data_type_infer_fn = 1;
 }
 
+def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
+  let input = (ins
+    OneFlow_Tensor:$x
+  );
+  let output = (outs
+    OneFlow_Tensor:$y
+  );
+  let has_logical_tensor_desc_infer_fn = 1;
+  let has_physical_tensor_desc_infer_fn = 1;
+  let has_get_sbp_fn = 1;
+  let has_data_type_infer_fn = 1;
+}
+
+def OneFlow_DigammaGradOp : OneFlow_BaseOp<"digamma_grad", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
+  let input = (ins
+    OneFlow_Tensor:$x
+  );
+  let output = (outs
+    OneFlow_Tensor:$y
+  );
+  let has_logical_tensor_desc_infer_fn = 1;
+  let has_physical_tensor_desc_infer_fn = 1;
+  let has_get_sbp_fn = 1;
+  let has_data_type_infer_fn = 1;
+}
+
 def OneFlow_LogOp : OneFlow_BaseOp<"log", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
   let input = (ins
     OneFlow_Tensor:$x
diff --git a/oneflow/user/kernels/math_unary_elementwise_primitive_kernel.cpp b/oneflow/user/kernels/math_unary_elementwise_primitive_kernel.cpp
index a8a961e91db..2bfeb4c3374 100644
--- a/oneflow/user/kernels/math_unary_elementwise_primitive_kernel.cpp
+++ b/oneflow/user/kernels/math_unary_elementwise_primitive_kernel.cpp
@@ -30,6 +30,7 @@ namespace oneflow {
   OF_PP_MAKE_TUPLE_SEQ("ceil", ep::primitive::UnaryOp::kCeil)                         \
   OF_PP_MAKE_TUPLE_SEQ("cos", ep::primitive::UnaryOp::kCos)                           \
   OF_PP_MAKE_TUPLE_SEQ("cosh", ep::primitive::UnaryOp::kCosh)                         \
+  OF_PP_MAKE_TUPLE_SEQ("digamma", ep::primitive::UnaryOp::kDigamma)                   \
   OF_PP_MAKE_TUPLE_SEQ("erf", ep::primitive::UnaryOp::kErf)                           \
   OF_PP_MAKE_TUPLE_SEQ("erfc", ep::primitive::UnaryOp::kErfc)                         \
   OF_PP_MAKE_TUPLE_SEQ("exp", ep::primitive::UnaryOp::kExp)                           \
@@ -68,6 +69,7 @@ namespace oneflow {
   OF_PP_MAKE_TUPLE_SEQ("atanh_grad", ep::primitive::BinaryOp::kAtanhBackwardWithDyX)            \
   OF_PP_MAKE_TUPLE_SEQ("cos_grad", ep::primitive::BinaryOp::kCosBackwardWithDyX)                \
   OF_PP_MAKE_TUPLE_SEQ("cosh_grad", ep::primitive::BinaryOp::kCoshBackwardWithDyX)              \
+  OF_PP_MAKE_TUPLE_SEQ("digamma_grad", ep::primitive::BinaryOp::kDigammaBackwardWithDyX)        \
   OF_PP_MAKE_TUPLE_SEQ("erf_grad", ep::primitive::BinaryOp::kErfBackwardWithDyX)                \
   OF_PP_MAKE_TUPLE_SEQ("erfc_grad", ep::primitive::BinaryOp::kErfcBackwardWithDyX)              \
   OF_PP_MAKE_TUPLE_SEQ("exp_grad", ep::primitive::BinaryOp::kExpBackwardWithDyX)                \
diff --git a/oneflow/user/ops/math_unary_elementwise_seq.h b/oneflow/user/ops/math_unary_elementwise_seq.h
index 9cb83ae23e4..43bacc19477 100644
--- a/oneflow/user/ops/math_unary_elementwise_seq.h
+++ b/oneflow/user/ops/math_unary_elementwise_seq.h
@@ -31,6 +31,7 @@ namespace oneflow {
   OF_PP_MAKE_TUPLE_SEQ("ceil", Ceil)                         \
   OF_PP_MAKE_TUPLE_SEQ("cos", Cos)                           \
   OF_PP_MAKE_TUPLE_SEQ("cosh", Cosh)                         \
+  OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma)                   \
   OF_PP_MAKE_TUPLE_SEQ("erf", Erf)                           \
   OF_PP_MAKE_TUPLE_SEQ("erfc", Erfc)                         \
   OF_PP_MAKE_TUPLE_SEQ("exp", Exp)                           \
@@ -75,6 +76,7 @@ namespace oneflow {
   OF_PP_MAKE_TUPLE_SEQ("expm1", Expm1)                          \
   OF_PP_MAKE_TUPLE_SEQ("log", Log)                              \
   OF_PP_MAKE_TUPLE_SEQ("lgamma", Lgamma)                        \
+  OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma)                        \
   OF_PP_MAKE_TUPLE_SEQ("log2", Log2)                            \
   OF_PP_MAKE_TUPLE_SEQ("log10", Log10)                          \
   OF_PP_MAKE_TUPLE_SEQ("log1p", Log1p)                          \
diff --git a/python/oneflow/special/__init__.py b/python/oneflow/special/__init__.py
index 941cd04c1d9..dd3c369e3bc 100644
--- a/python/oneflow/special/__init__.py
+++ b/python/oneflow/special/__init__.py
@@ -25,3 +25,4 @@
 from .special_ops import logsumexp
 from .special_ops import round
 from .special_ops import softmax
+from .special_ops import digamma
diff --git a/python/oneflow/special/special_ops.py b/python/oneflow/special/special_ops.py
index bcbb038a6fe..0d7017f8b65 100644
--- a/python/oneflow/special/special_ops.py
+++ b/python/oneflow/special/special_ops.py
@@ -59,3 +59,6 @@ def round(x: Tensor):
 
 def softmax(x: Tensor, dim: int):
     return oneflow._C.softmax(x, dim)
+
+def digamma(x:Tensor):
+    return oneflow._C.digamma(x)
\ No newline at end of file

From fec26e3a4e5c0ec8d17d92f9569326e63b8094e6 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sat, 1 Apr 2023 12:13:36 +0000
Subject: [PATCH 02/21] unittest

---
 python/oneflow/test/modules/test_special_ops.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py
index 28e7137b798..7db6a09a60f 100644
--- a/python/oneflow/test/modules/test_special_ops.py
+++ b/python/oneflow/test/modules/test_special_ops.py
@@ -112,6 +112,14 @@ def test_flow_logsumexp_with_random_data(test_case):
         x = random_tensor(4, random(0, 5), 2).to(device)
         y = torch.special.logsumexp(x, dim=np.random.randint(0, 3))
         return y
+    
+    # TODO:shijiaxing  When the grad function be implemented, set "auto_backward=auto"
+    @autotest(n=5, auto_backward=False)
+    def test_flow_digamma_with_random_data(test_case):
+        device = random_device()
+        x_dtype = random_dtype(["arithmetic"])
+        x = random_tensor().to(device).to(x_dtype)
+        y = torch.special.digamma(x)
 
 
 if __name__ == "__main__":

From 8aac0c7c8834e4c1abc818cbf7ea3411b16daaa4 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 07:50:55 +0000
Subject: [PATCH 03/21] refine

---
 oneflow/core/common/math_util.h               | 18 ++++++++---------
 .../primitive/broadcast_elementwise_binary.h  | 20 +++++++++----------
 oneflow/core/ep/cpu/primitive/unary_functor.h |  1 +
 .../core/ep/cuda/primitive/unary_functor.cuh  | 14 ++-----------
 .../core/functional/impl/unary_functor.cpp    |  2 +-
 oneflow/ir/include/OneFlow/OneFlowUserOps.td  |  5 +++--
 oneflow/user/ops/math_unary_elementwise_seq.h |  2 +-
 python/oneflow/special/special_ops.py         |  5 +++--
 .../oneflow/test/modules/test_special_ops.py  |  4 ++--
 9 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h
index 7aba95e5cdb..4d8461a15e8 100644
--- a/oneflow/core/common/math_util.h
+++ b/oneflow/core/common/math_util.h
@@ -21,12 +21,18 @@ limitations under the License.
 
 namespace oneflow {
 
+/*
+ * math constants
+ */
+template<typename T>
+constexpr T pi = static_cast<T>(3.141592653589793238462643383279502);
+
 int64_t Gcd(int64_t m, int64_t n);
 
 int64_t Lcm(int64_t m, int64_t n);
 
 template<typename T>
- T polevl(const T x, const T A[], size_t len);
+T polevl(const T x, const T A[], size_t len);
 
 // This function references pytorch/aten/src/ATen/native/Math.h
 double calc_digamma_cpu(double x);
@@ -34,7 +40,7 @@ double calc_digamma_cpu(double x);
 float calc_digamma_cpu(float x);
 
 template<typename scalar_t, typename accscalar_t>
-static OF_DEVICE_FUNC scalar_t calc_digamma_cuda(scalar_t in) {
+OF_DEVICE_FUNC scalar_t calc_digamma_cuda(scalar_t in) {
   static const double PI_f64 = 3.14159265358979323846;
   const accscalar_t PSI_10 = 2.25175258906672110764;
   const accscalar_t A[] = {
@@ -104,14 +110,6 @@ OF_DEVICE_FUNC T DeviceMax(T a, T b) {
 #endif
 }
 
-template<typename T>
-constexpr T pi = static_cast<T>(3.141592653589793238462643383279502);
-
-// template <typename T>
-// inline constexpr T pi() {
-//   return static_cast<T>(3.141592653589793238462643383279502);
-// }
-
 }  // namespace oneflow
 
 #endif  // ONEFLOW_CORE_COMMON_MATH_UTIL_H_
diff --git a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
index 2b4ad33a9d1..9aad1dc068f 100644
--- a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
+++ b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
@@ -131,16 +131,16 @@ inline bool IsDimsEquals(size_t num_src0_dims, const int64_t* src0_dims, size_t
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kAtanhBackwardWithDyX) \
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCosBackwardWithDyX)
 
-#define BINARY_MATH_BACKWARD_OP_SEQ_1                     \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX)     \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX)     \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX)  \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX) \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX)     \
+#define BINARY_MATH_BACKWARD_OP_SEQ_1                    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX)   \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX)   \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX)   \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX)  \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX)\
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX)    \
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLog2BackwardWithDyX)
 
 #define BINARY_MATH_BACKWARD_OP_SEQ_2                             \
diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h
index cc0b94d33a8..170d1eafa9d 100644
--- a/oneflow/core/ep/cpu/primitive/unary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/unary_functor.h
@@ -16,6 +16,7 @@ limitations under the License.
 #include "oneflow/core/ep/common/primitive/unary_functor.h"
 #include "oneflow/core/ep/cpu/primitive/type_seq.h"
 #include "oneflow/core/common/math_util.h"
+
 namespace oneflow {
 namespace ep {
 namespace primitive {
diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
index 820a135d0e5..3b29695632e 100644
--- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
@@ -224,13 +224,12 @@ struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kTrunc, double, double> {
   OF_DEVICE_FUNC double operator()(double src) const { return trunc(src); }
 };
 
-
 template<typename Dst, typename Src>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
   OF_DEVICE_FUNC Dst operator()(Src src) const {
-    return static_cast<Dst>(calc_digamma_cuda<Src,Src>(src));
+    return static_cast<Dst>(calc_digamma_cuda<Src, Src>(src));
   }
 };
 
@@ -238,18 +237,9 @@ template<>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, half, half> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
-  OF_DEVICE_FUNC half operator()(half src) const {
-    return  calc_digamma_cuda<half,float>(src);
-  }
+  OF_DEVICE_FUNC half operator()(half src) const { return calc_digamma_cuda<half, float>(src); }
 };
-// template<>
-// struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, double, double> {
-//   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
-//   OF_DEVICE_FUNC double operator()(double src) const {
-//     return calc_digamma_cuda<double>(src);
-//   }
-// };
 
 template<>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kAbs, half, half> {
diff --git a/oneflow/core/functional/impl/unary_functor.cpp b/oneflow/core/functional/impl/unary_functor.cpp
index 3d1733efce0..d0afe4ec8bf 100644
--- a/oneflow/core/functional/impl/unary_functor.cpp
+++ b/oneflow/core/functional/impl/unary_functor.cpp
@@ -180,7 +180,7 @@ ONEFLOW_FUNCTION_LIBRARY(m) {
   m.add_functor<CeilFunctor>("Ceil");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Cos, "Cos");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Cosh, "Cosh");
-  ADD_UNARY_FUNCTOR_WITH_DY_X(Digamma,"Digamma");
+  ADD_UNARY_FUNCTOR_WITH_DY_X(Digamma, "Digamma");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Erf, "Erf");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Erfc, "Erfc");
   ADD_UNARY_FUNCTOR_WITH_DY_X(Exp, "Exp");
diff --git a/oneflow/ir/include/OneFlow/OneFlowUserOps.td b/oneflow/ir/include/OneFlow/OneFlowUserOps.td
index 4444fff724c..3358035dac0 100644
--- a/oneflow/ir/include/OneFlow/OneFlowUserOps.td
+++ b/oneflow/ir/include/OneFlow/OneFlowUserOps.td
@@ -4599,10 +4599,11 @@ def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoSideEffect, DeclareOpInterf
 
 def OneFlow_DigammaGradOp : OneFlow_BaseOp<"digamma_grad", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
   let input = (ins
-    OneFlow_Tensor:$x
+    OneFlow_Tensor:$x,
+    OneFlow_Tensor:$dy
   );
   let output = (outs
-    OneFlow_Tensor:$y
+    OneFlow_Tensor:$dx
   );
   let has_logical_tensor_desc_infer_fn = 1;
   let has_physical_tensor_desc_infer_fn = 1;
diff --git a/oneflow/user/ops/math_unary_elementwise_seq.h b/oneflow/user/ops/math_unary_elementwise_seq.h
index 43bacc19477..9f518203095 100644
--- a/oneflow/user/ops/math_unary_elementwise_seq.h
+++ b/oneflow/user/ops/math_unary_elementwise_seq.h
@@ -76,7 +76,7 @@ namespace oneflow {
   OF_PP_MAKE_TUPLE_SEQ("expm1", Expm1)                          \
   OF_PP_MAKE_TUPLE_SEQ("log", Log)                              \
   OF_PP_MAKE_TUPLE_SEQ("lgamma", Lgamma)                        \
-  OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma)                        \
+  OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma)                      \
   OF_PP_MAKE_TUPLE_SEQ("log2", Log2)                            \
   OF_PP_MAKE_TUPLE_SEQ("log10", Log10)                          \
   OF_PP_MAKE_TUPLE_SEQ("log1p", Log1p)                          \
diff --git a/python/oneflow/special/special_ops.py b/python/oneflow/special/special_ops.py
index 0d7017f8b65..809bd9bd858 100644
--- a/python/oneflow/special/special_ops.py
+++ b/python/oneflow/special/special_ops.py
@@ -60,5 +60,6 @@ def round(x: Tensor):
 def softmax(x: Tensor, dim: int):
     return oneflow._C.softmax(x, dim)
 
-def digamma(x:Tensor):
-    return oneflow._C.digamma(x)
\ No newline at end of file
+
+def digamma(x: Tensor):
+    return oneflow._C.digamma(x)
diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py
index 7db6a09a60f..fedc1f1145e 100644
--- a/python/oneflow/test/modules/test_special_ops.py
+++ b/python/oneflow/test/modules/test_special_ops.py
@@ -112,12 +112,12 @@ def test_flow_logsumexp_with_random_data(test_case):
         x = random_tensor(4, random(0, 5), 2).to(device)
         y = torch.special.logsumexp(x, dim=np.random.randint(0, 3))
         return y
-    
+
     # TODO:shijiaxing  When the grad function be implemented, set "auto_backward=auto"
     @autotest(n=5, auto_backward=False)
     def test_flow_digamma_with_random_data(test_case):
         device = random_device()
-        x_dtype = random_dtype(["arithmetic"])
+        x_dtype = random_dtype(["arithmetic","half"])
         x = random_tensor().to(device).to(x_dtype)
         y = torch.special.digamma(x)
 

From b1fe15bd829c6931770ef024f82a2491748abaeb Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 07:53:12 +0000
Subject: [PATCH 04/21] tensor.digamma  api

---
 oneflow/api/python/framework/tensor_functions.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/oneflow/api/python/framework/tensor_functions.cpp b/oneflow/api/python/framework/tensor_functions.cpp
index 12a0107b6c2..4f2f0a04de0 100644
--- a/oneflow/api/python/framework/tensor_functions.cpp
+++ b/oneflow/api/python/framework/tensor_functions.cpp
@@ -200,6 +200,7 @@ PyNumberMethods PyTensorObject_as_number = {
   }
 
 UNARY_METHOD(PyTensorObject_abs, functional::Abs);
+UNARY_METHOD(PyTensorObject_digamma, functional::Digamma);
 UNARY_METHOD(PyTensorObject_exp, functional::Exp);
 UNARY_METHOD(PyTensorObject_exp2, functional::Exp2);
 UNARY_METHOD(PyTensorObject_floor, functional::Floor);
@@ -788,9 +789,7 @@ static PyObject* PyTensorObject_to_global(PyObject* self, PyObject* args, PyObje
   PyObject* result = NULL;
   if (tensor->is_global())
     result = PyTensorObject_global_to_global(self, args, kwargs);
-  else {
-    result = PyTensorObject_local_to_global(self, args, kwargs);
-  }
+  else { result = PyTensorObject_local_to_global(self, args, kwargs); }
   if (PyErr_Occurred()) { throw py::error_already_set(); }
   return result;
 
@@ -1098,6 +1097,7 @@ PyMethodDef PyTensorObject_extra_methods[] = {
 
     // macro UNARY_METHOD
     {"abs", PyTensorObject_abs, METH_NOARGS, NULL},
+    {"digamma", PyTensorObject_digamma, METH_NOARGS, NULL},
     {"exp", PyTensorObject_exp, METH_NOARGS, NULL},
     {"exp2", PyTensorObject_exp2, METH_NOARGS, NULL},
     {"floor", PyTensorObject_floor, METH_NOARGS, NULL},

From 0e920827a3b46244e40010dd66e9812b5d717358 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 07:54:32 +0000
Subject: [PATCH 05/21] flow.digamma  api

---
 python/oneflow/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/oneflow/__init__.py b/python/oneflow/__init__.py
index 05f05e307d1..a68ccfa342a 100644
--- a/python/oneflow/__init__.py
+++ b/python/oneflow/__init__.py
@@ -103,6 +103,7 @@ def use_deterministic_algorithms(mode, *, warn_only=False):
 from oneflow._C import baddbmm
 from oneflow._C import broadcast_like
 from oneflow._C import chunk
+from oneflow._C import digamma
 from oneflow._C import split
 from oneflow._C import sign
 from oneflow._C import sinh

From a08eada3784001ec4aeb93fe46f114a3c7f5e0dd Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 08:00:03 +0000
Subject: [PATCH 06/21] fix test

---
 python/oneflow/test/modules/test_special_ops.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py
index fedc1f1145e..22513ad060d 100644
--- a/python/oneflow/test/modules/test_special_ops.py
+++ b/python/oneflow/test/modules/test_special_ops.py
@@ -120,6 +120,7 @@ def test_flow_digamma_with_random_data(test_case):
         x_dtype = random_dtype(["arithmetic","half"])
         x = random_tensor().to(device).to(x_dtype)
         y = torch.special.digamma(x)
+        return y
 
 
 if __name__ == "__main__":

From b60259f3c743deca3616f979b15a74a53a1e6472 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 08:28:40 +0000
Subject: [PATCH 07/21] unittest

---
 oneflow/core/ep/cuda/primitive/unary_functor.cuh  |  8 +-------
 .../oneflow/test/modules/test_global_math_ops.py  |  8 ++++++++
 python/oneflow/test/modules/test_math_ops.py      |  8 ++++++++
 python/oneflow/test/modules/test_special_ops.py   |  4 ++--
 .../test/tensor/test_bfloat16_activation.py       | 15 +++++++++++++++
 python/oneflow/test/tensor/test_tensor_part_2.py  |  7 +++++++
 6 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
index 3b29695632e..74527e363f6 100644
--- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
@@ -233,13 +233,6 @@ struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
   }
 };
 
-template<>
-struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, half, half> {
-  OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
-
-  OF_DEVICE_FUNC half operator()(half src) const { return calc_digamma_cuda<half, float>(src); }
-};
-
 
 template<>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kAbs, half, half> {
@@ -369,6 +362,7 @@ SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kAtanh);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCeil);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCos);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCosh);
+SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kDigamma);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kErf);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kErfc);
 SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kExp);
diff --git a/python/oneflow/test/modules/test_global_math_ops.py b/python/oneflow/test/modules/test_global_math_ops.py
index 624c60a398a..c4f7778059c 100644
--- a/python/oneflow/test/modules/test_global_math_ops.py
+++ b/python/oneflow/test/modules/test_global_math_ops.py
@@ -171,6 +171,13 @@ def _test_atan2(test_case, placement, sbp, ndim):
     z = torch.atan2(x, y)
     return z
 
+# TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
+@autotest(n=1, auto_backward=False)
+def _test_digamma(test_case, placement, sbp, ndim):
+    dim_list = [random(1, 3).to(int).value() * 8 for _ in range(ndim)]
+    x = random_tensor(ndim, *dim_list, low=0, high=10).to_global(placement, sbp)
+    y = torch.digamma(x)
+    return y
 
 class TestMathOps(flow.unittest.TestCase):
     @globaltest
@@ -194,6 +201,7 @@ def test_math_ops(test_case):
                 _test_acos(test_case, placement, sbp, ndim)
                 _test_arccosh(test_case, placement, sbp, ndim)
                 _test_acosh(test_case, placement, sbp, ndim)
+                _test_digamma(test_case, placement, sbp, ndim)
 
                 _test_floordiv(test_case, placement, sbp, ndim)
                 _test_atan2(test_case, placement, sbp, ndim)
diff --git a/python/oneflow/test/modules/test_math_ops.py b/python/oneflow/test/modules/test_math_ops.py
index 6463d0b3b0e..2355c233b0c 100644
--- a/python/oneflow/test/modules/test_math_ops.py
+++ b/python/oneflow/test/modules/test_math_ops.py
@@ -608,6 +608,14 @@ def test_log10_with_random_data(test_case):
         x = random_tensor().to(device)
         return torch.log10(x)
 
+@flow.unittest.skip_unless_1n1d()
+class TestDigammaModule(flow.unittest.TestCase):
+    # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
+    @autotest(n=5, auto_backward=False)
+    def test_digamma_with_random_data(test_case):
+        device = random_device()
+        x = random_tensor().to(device)
+        return torch.digamma(x)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py
index 22513ad060d..7f82a609830 100644
--- a/python/oneflow/test/modules/test_special_ops.py
+++ b/python/oneflow/test/modules/test_special_ops.py
@@ -113,11 +113,11 @@ def test_flow_logsumexp_with_random_data(test_case):
         y = torch.special.logsumexp(x, dim=np.random.randint(0, 3))
         return y
 
-    # TODO:shijiaxing  When the grad function be implemented, set "auto_backward=auto"
+ # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
     @autotest(n=5, auto_backward=False)
     def test_flow_digamma_with_random_data(test_case):
         device = random_device()
-        x_dtype = random_dtype(["arithmetic","half"])
+        x_dtype = random_dtype(["arithmetic","half","bfloat16"])
         x = random_tensor().to(device).to(x_dtype)
         y = torch.special.digamma(x)
         return y
diff --git a/python/oneflow/test/tensor/test_bfloat16_activation.py b/python/oneflow/test/tensor/test_bfloat16_activation.py
index 0378d5bea3d..2b888cc31e3 100644
--- a/python/oneflow/test/tensor/test_bfloat16_activation.py
+++ b/python/oneflow/test/tensor/test_bfloat16_activation.py
@@ -690,6 +690,21 @@ def test_logsinmoid_with_random_data(test_case):
                 rtol=1e-4,
             )
         )
+    
+    def test_digamma_with_random_data(test_case):
+        np_array = np.random.rand(4, 4)
+        x = flow.tensor(np_array, dtype=flow.bfloat16, device="cpu")
+        fp32_x = x.float()
+        y = flow.digamma(x)
+        fp32_y = flow.digamma(fp32_x)
+        test_case.assertTrue(
+            np.allclose(
+                y.float().numpy(),
+                fp32_y.bfloat16().float().numpy(),
+                atol=1e-4,
+                rtol=1e-4,
+            )
+        )
 
 
 if __name__ == "__main__":
diff --git a/python/oneflow/test/tensor/test_tensor_part_2.py b/python/oneflow/test/tensor/test_tensor_part_2.py
index 546bdcb2597..7b4a4b07092 100644
--- a/python/oneflow/test/tensor/test_tensor_part_2.py
+++ b/python/oneflow/test/tensor/test_tensor_part_2.py
@@ -935,6 +935,13 @@ def test_construct_global_tensor_by_numpy(test_case):
             x, placement=placement, sbp=[flow.sbp.split(0)], requires_grad=False,
         )
         test_case.assertTrue(y_default_dtype.dtype == flow.int32)
+        
+    # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
+    @autotest(n=5,auto_backward=False)
+    def test_digamma_tensor_with_random_data(test_case):
+        device = random_device()
+        x = random_tensor().to(device)
+        return x.digamma()
 
 
 @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")

From 2e0248e233e8d799f54431ac9988ffc96b796864 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 08:32:03 +0000
Subject: [PATCH 08/21] fmt

---
 .../primitive/broadcast_elementwise_binary.h  | 20 +++++++++----------
 .../core/ep/cuda/primitive/unary_functor.cuh  |  1 -
 .../test/modules/test_global_math_ops.py      |  2 ++
 python/oneflow/test/modules/test_math_ops.py  |  2 ++
 .../oneflow/test/modules/test_special_ops.py  |  4 ++--
 .../test/tensor/test_bfloat16_activation.py   |  2 +-
 .../oneflow/test/tensor/test_tensor_part_2.py |  4 ++--
 7 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
index 9aad1dc068f..2b4ad33a9d1 100644
--- a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
+++ b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h
@@ -131,16 +131,16 @@ inline bool IsDimsEquals(size_t num_src0_dims, const int64_t* src0_dims, size_t
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kAtanhBackwardWithDyX) \
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCosBackwardWithDyX)
 
-#define BINARY_MATH_BACKWARD_OP_SEQ_1                    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX)    \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX)   \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX)  \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX)\
-  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX)    \
+#define BINARY_MATH_BACKWARD_OP_SEQ_1                     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX)     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX)     \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX)    \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX)   \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX)  \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX) \
+  OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX)     \
   OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLog2BackwardWithDyX)
 
 #define BINARY_MATH_BACKWARD_OP_SEQ_2                             \
diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
index 74527e363f6..5dcbefac4c5 100644
--- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
@@ -233,7 +233,6 @@ struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
   }
 };
 
-
 template<>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kAbs, half, half> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
diff --git a/python/oneflow/test/modules/test_global_math_ops.py b/python/oneflow/test/modules/test_global_math_ops.py
index c4f7778059c..dbd46a27948 100644
--- a/python/oneflow/test/modules/test_global_math_ops.py
+++ b/python/oneflow/test/modules/test_global_math_ops.py
@@ -171,6 +171,7 @@ def _test_atan2(test_case, placement, sbp, ndim):
     z = torch.atan2(x, y)
     return z
 
+
 # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
 @autotest(n=1, auto_backward=False)
 def _test_digamma(test_case, placement, sbp, ndim):
@@ -179,6 +180,7 @@ def _test_digamma(test_case, placement, sbp, ndim):
     y = torch.digamma(x)
     return y
 
+
 class TestMathOps(flow.unittest.TestCase):
     @globaltest
     def test_math_ops(test_case):
diff --git a/python/oneflow/test/modules/test_math_ops.py b/python/oneflow/test/modules/test_math_ops.py
index 2355c233b0c..5c27662cc56 100644
--- a/python/oneflow/test/modules/test_math_ops.py
+++ b/python/oneflow/test/modules/test_math_ops.py
@@ -608,6 +608,7 @@ def test_log10_with_random_data(test_case):
         x = random_tensor().to(device)
         return torch.log10(x)
 
+
 @flow.unittest.skip_unless_1n1d()
 class TestDigammaModule(flow.unittest.TestCase):
     # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
@@ -617,5 +618,6 @@ def test_digamma_with_random_data(test_case):
         x = random_tensor().to(device)
         return torch.digamma(x)
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py
index 7f82a609830..f76b5b63997 100644
--- a/python/oneflow/test/modules/test_special_ops.py
+++ b/python/oneflow/test/modules/test_special_ops.py
@@ -113,11 +113,11 @@ def test_flow_logsumexp_with_random_data(test_case):
         y = torch.special.logsumexp(x, dim=np.random.randint(0, 3))
         return y
 
- # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
+    # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
     @autotest(n=5, auto_backward=False)
     def test_flow_digamma_with_random_data(test_case):
         device = random_device()
-        x_dtype = random_dtype(["arithmetic","half","bfloat16"])
+        x_dtype = random_dtype(["arithmetic", "half", "bfloat16"])
         x = random_tensor().to(device).to(x_dtype)
         y = torch.special.digamma(x)
         return y
diff --git a/python/oneflow/test/tensor/test_bfloat16_activation.py b/python/oneflow/test/tensor/test_bfloat16_activation.py
index 2b888cc31e3..642994e020d 100644
--- a/python/oneflow/test/tensor/test_bfloat16_activation.py
+++ b/python/oneflow/test/tensor/test_bfloat16_activation.py
@@ -690,7 +690,7 @@ def test_logsinmoid_with_random_data(test_case):
                 rtol=1e-4,
             )
         )
-    
+
     def test_digamma_with_random_data(test_case):
         np_array = np.random.rand(4, 4)
         x = flow.tensor(np_array, dtype=flow.bfloat16, device="cpu")
diff --git a/python/oneflow/test/tensor/test_tensor_part_2.py b/python/oneflow/test/tensor/test_tensor_part_2.py
index 7b4a4b07092..82bfa7e7276 100644
--- a/python/oneflow/test/tensor/test_tensor_part_2.py
+++ b/python/oneflow/test/tensor/test_tensor_part_2.py
@@ -935,9 +935,9 @@ def test_construct_global_tensor_by_numpy(test_case):
             x, placement=placement, sbp=[flow.sbp.split(0)], requires_grad=False,
         )
         test_case.assertTrue(y_default_dtype.dtype == flow.int32)
-        
+
     # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
-    @autotest(n=5,auto_backward=False)
+    @autotest(n=5, auto_backward=False)
     def test_digamma_tensor_with_random_data(test_case):
         device = random_device()
         x = random_tensor().to(device)

From 98ebc8be04417ba927b1586e74aa612a51b8d9c1 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 08:36:13 +0000
Subject: [PATCH 09/21] auto  fmt

---
 oneflow/api/python/framework/tensor_functions.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/oneflow/api/python/framework/tensor_functions.cpp b/oneflow/api/python/framework/tensor_functions.cpp
index 1e493d02db8..9f064516991 100644
--- a/oneflow/api/python/framework/tensor_functions.cpp
+++ b/oneflow/api/python/framework/tensor_functions.cpp
@@ -791,7 +791,9 @@ static PyObject* PyTensorObject_to_global(PyObject* self, PyObject* args, PyObje
   PyObject* result = NULL;
   if (tensor->is_global())
     result = PyTensorObject_global_to_global(self, args, kwargs);
-  else { result = PyTensorObject_local_to_global(self, args, kwargs); }
+  else {
+    result = PyTensorObject_local_to_global(self, args, kwargs);
+  }
   if (PyErr_Occurred()) { throw py::error_already_set(); }
   return result;
 

From d8e5b0f78ff8d08ff0e8303e455015df758bae86 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 09:12:55 +0000
Subject: [PATCH 10/21] add api  psi

---
 python/oneflow/special/__init__.py              |  1 +
 python/oneflow/special/special_ops.py           |  3 +++
 python/oneflow/test/modules/test_special_ops.py | 11 ++++++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/python/oneflow/special/__init__.py b/python/oneflow/special/__init__.py
index dd3c369e3bc..940897460b4 100644
--- a/python/oneflow/special/__init__.py
+++ b/python/oneflow/special/__init__.py
@@ -26,3 +26,4 @@
 from .special_ops import round
 from .special_ops import softmax
 from .special_ops import digamma
+from .special_ops import psi
diff --git a/python/oneflow/special/special_ops.py b/python/oneflow/special/special_ops.py
index 809bd9bd858..4870e24c591 100644
--- a/python/oneflow/special/special_ops.py
+++ b/python/oneflow/special/special_ops.py
@@ -63,3 +63,6 @@ def softmax(x: Tensor, dim: int):
 
 def digamma(x: Tensor):
     return oneflow._C.digamma(x)
+
+def psi(x: Tensor):
+    return oneflow._C.digamma(x)
\ No newline at end of file
diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py
index f76b5b63997..210c9be73e6 100644
--- a/python/oneflow/test/modules/test_special_ops.py
+++ b/python/oneflow/test/modules/test_special_ops.py
@@ -117,10 +117,19 @@ def test_flow_logsumexp_with_random_data(test_case):
     @autotest(n=5, auto_backward=False)
     def test_flow_digamma_with_random_data(test_case):
         device = random_device()
-        x_dtype = random_dtype(["arithmetic", "half", "bfloat16"])
+        x_dtype = random_dtype(["arithmetic", "half"])
         x = random_tensor().to(device).to(x_dtype)
         y = torch.special.digamma(x)
         return y
+    
+    # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
+    @autotest(n=5, auto_backward=False)
+    def test_flow_psi_with_random_data(test_case):
+        device = random_device()
+        x_dtype = random_dtype(["arithmetic", "half"])
+        x = random_tensor().to(device).to(x_dtype)
+        y = torch.special.psi(x)
+        return y
 
 
 if __name__ == "__main__":

From 98cf0a7f72d7fcb3d16957fd00ba0e6734eb7a6b Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 09:13:08 +0000
Subject: [PATCH 11/21] docstr

---
 docs/source/special.rst                       |  1 +
 docs/source/tensor.rst                        |  1 +
 python/oneflow/framework/docstr/math_ops.py   | 21 +++++++++++++++++++
 .../oneflow/framework/docstr/special_ops.py   | 14 +++++++++++++
 python/oneflow/framework/docstr/tensor.py     |  7 +++++++
 5 files changed, 44 insertions(+)

diff --git a/docs/source/special.rst b/docs/source/special.rst
index 810a818ac9b..459a60ce8e7 100644
--- a/docs/source/special.rst
+++ b/docs/source/special.rst
@@ -8,6 +8,7 @@ The oneflow.special module, modeled after SciPy's special module.
     :toctree: generated
     :nosignatures:
 
+    digamma
     erf
     erfc
     erfinv
diff --git a/docs/source/tensor.rst b/docs/source/tensor.rst
index 9684082b444..7fa220e8d03 100644
--- a/docs/source/tensor.rst
+++ b/docs/source/tensor.rst
@@ -225,6 +225,7 @@ Tensor class reference
     Tensor.div_
     Tensor.double
     Tensor.dtype 
+    Tensor.digamma
     Tensor.element_size
     Tensor.eq
     Tensor.equal
diff --git a/python/oneflow/framework/docstr/math_ops.py b/python/oneflow/framework/docstr/math_ops.py
index d7a39e2f1b3..9717c01a773 100644
--- a/python/oneflow/framework/docstr/math_ops.py
+++ b/python/oneflow/framework/docstr/math_ops.py
@@ -1972,3 +1972,24 @@
         tensor([3., 0., -0., -0.], dtype=oneflow.float32)
     """,
 )
+
+add_docstr(
+    oneflow.digamma,
+    r"""digamma(input) -> Tensor
+
+    .. math::
+    \digamma(x) = \frac{d}{dx} \ln\left(\Gamma\left(x\right)\right) = \frac{\Gamma'(x)}{\Gamma(x)}
+
+    Args:
+        input (Tensor): the tensor to compute the digamma function on
+        
+    .. note::  This function is similar to SciPy's `scipy.special.digamma`.
+
+    Example::
+
+        >>> import oneflow as flow
+        >>> a = flow.tensor([1, 0.5, 0, -2.1])
+        >>> flow.digamma(a)
+        tensor([-5.7722e-01, -1.9635e+00,        -inf,  1.0630e+01], dtype=oneflow.float32)
+    """,
+)
diff --git a/python/oneflow/framework/docstr/special_ops.py b/python/oneflow/framework/docstr/special_ops.py
index 7807312ab0c..9f9336458d3 100644
--- a/python/oneflow/framework/docstr/special_ops.py
+++ b/python/oneflow/framework/docstr/special_ops.py
@@ -16,6 +16,13 @@
 import oneflow
 from oneflow.framework.docstr.utils import add_docstr
 
+add_docstr(
+    oneflow.special.digamma,
+    """
+    Alias for :func:`oneflow.digamma`. 
+    """,
+)
+
 add_docstr(
     oneflow.special.erf,
     """
@@ -85,3 +92,10 @@
     Alias for :func:`oneflow.softmax`. 
     """,
 )
+
+add_docstr(
+    oneflow.special.psi,
+    """
+    Alias for :func:`oneflow.special.digamma`. 
+    """,
+)
\ No newline at end of file
diff --git a/python/oneflow/framework/docstr/tensor.py b/python/oneflow/framework/docstr/tensor.py
index 34c657d99a7..c4f58577f48 100644
--- a/python/oneflow/framework/docstr/tensor.py
+++ b/python/oneflow/framework/docstr/tensor.py
@@ -2770,3 +2770,10 @@
     In-place version of :func:`oneflow.Tensor.frac`.
     """,
 )
+
+add_docstr(
+    oneflow.Tensor.digamma,
+    """
+    See :func:`oneflow.digamma`
+    """,
+)
\ No newline at end of file

From b0702e2011c337514f6b03dc11aaee05aa8dbcc2 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 10:17:08 +0000
Subject: [PATCH 12/21] fmt

---
 python/oneflow/framework/docstr/special_ops.py  | 2 +-
 python/oneflow/framework/docstr/tensor.py       | 2 +-
 python/oneflow/special/special_ops.py           | 3 ++-
 python/oneflow/test/modules/test_special_ops.py | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/oneflow/framework/docstr/special_ops.py b/python/oneflow/framework/docstr/special_ops.py
index 9f9336458d3..1cbdba38e20 100644
--- a/python/oneflow/framework/docstr/special_ops.py
+++ b/python/oneflow/framework/docstr/special_ops.py
@@ -98,4 +98,4 @@
     """
     Alias for :func:`oneflow.special.digamma`. 
     """,
-)
\ No newline at end of file
+)
diff --git a/python/oneflow/framework/docstr/tensor.py b/python/oneflow/framework/docstr/tensor.py
index c4f58577f48..82d001aec74 100644
--- a/python/oneflow/framework/docstr/tensor.py
+++ b/python/oneflow/framework/docstr/tensor.py
@@ -2776,4 +2776,4 @@
     """
     See :func:`oneflow.digamma`
     """,
-)
\ No newline at end of file
+)
diff --git a/python/oneflow/special/special_ops.py b/python/oneflow/special/special_ops.py
index 4870e24c591..4224324ec1c 100644
--- a/python/oneflow/special/special_ops.py
+++ b/python/oneflow/special/special_ops.py
@@ -64,5 +64,6 @@ def softmax(x: Tensor, dim: int):
 def digamma(x: Tensor):
     return oneflow._C.digamma(x)
 
+
 def psi(x: Tensor):
-    return oneflow._C.digamma(x)
\ No newline at end of file
+    return oneflow._C.digamma(x)
diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py
index 210c9be73e6..84b5fd50c1f 100644
--- a/python/oneflow/test/modules/test_special_ops.py
+++ b/python/oneflow/test/modules/test_special_ops.py
@@ -121,7 +121,7 @@ def test_flow_digamma_with_random_data(test_case):
         x = random_tensor().to(device).to(x_dtype)
         y = torch.special.digamma(x)
         return y
-    
+
     # TODO:shijiaxing  When the grad function be implemented, rm "auto_backward=False"
     @autotest(n=5, auto_backward=False)
     def test_flow_psi_with_random_data(test_case):

From c1759ec0dba46d0754a1d511eb2b5a9628a9daa7 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sun, 2 Apr 2023 11:46:26 +0000
Subject: [PATCH 13/21] fix  docstr

---
 python/oneflow/framework/docstr/math_ops.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/oneflow/framework/docstr/math_ops.py b/python/oneflow/framework/docstr/math_ops.py
index 9717c01a773..7160f21dc99 100644
--- a/python/oneflow/framework/docstr/math_ops.py
+++ b/python/oneflow/framework/docstr/math_ops.py
@@ -1988,8 +1988,9 @@
     Example::
 
         >>> import oneflow as flow
-        >>> a = flow.tensor([1, 0.5, 0, -2.1])
+        >>> a = flow.tensor([1, 0.5])
         >>> flow.digamma(a)
-        tensor([-5.7722e-01, -1.9635e+00,        -inf,  1.0630e+01], dtype=oneflow.float32)
+        tensor([-0.5772, -1.9635], dtype=oneflow.float32)
+        
     """,
 )

From 4b41d24b872edc4c9bd5b6096c18d860705cc300 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Mon, 3 Apr 2023 04:45:36 +0000
Subject: [PATCH 14/21] refine

---
 oneflow/core/common/math_util.cpp             | 111 ---------------
 oneflow/core/common/math_util.h               |  59 --------
 .../core/ep/cpu/primitive/binary_functor.h    |   2 +-
 oneflow/core/ep/cpu/primitive/unary_functor.h | 128 +++++++++++++++++-
 .../core/ep/cuda/primitive/binary_functor.cuh |   2 +-
 .../core/ep/cuda/primitive/unary_functor.cuh  |  53 +++++++-
 6 files changed, 178 insertions(+), 177 deletions(-)

diff --git a/oneflow/core/common/math_util.cpp b/oneflow/core/common/math_util.cpp
index f5ce19d4491..694c8bf1e3b 100644
--- a/oneflow/core/common/math_util.cpp
+++ b/oneflow/core/common/math_util.cpp
@@ -29,116 +29,5 @@ int64_t Gcd(int64_t m, int64_t n) {
 
 int64_t Lcm(int64_t m, int64_t n) { return m * n / Gcd(m, n); }
 
-template<typename T>
-T polevl(const T x, const T A[], size_t len) {
-  T result = 0;
-  for (size_t i = 0; i <= len; i++) { result = result * x + A[i]; }
-  return result;
-}
-
-/*
- * This function is derived from the implementation of the digamma function in the Cephes Math
- * Library. See note [3-Clause BSD License for the Cephes Math Library].
- */
-
-double calc_digamma_cpu(double x) {
-  static double PSI_10 = 2.25175258906672110764;
-  if (x == 0) {
-    // As per C++ standard for gamma related functions and SciPy,
-    // If the argument is ±0, ±∞ is returned
-    return std::copysign(INFINITY, -x);
-  }
-
-  bool x_is_integer = x == trunc(x);
-  if (x < 0) {
-    if (x_is_integer) {
-      // As per C++ standard for gamma related functions and SciPy,
-      // If the argument is a negative integer, NaN is returned
-      return std::numeric_limits<double>::quiet_NaN();
-    }
-    // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
-    // accurate than tan(pi * x). While these operations are mathematically equivalent
-    // since both x and r are in radians and tan() has a periodicity of pi, in practice
-    // the computation of pi * x is a source of error (when |x| > 1).
-    double q, r;
-    r = std::modf(x, &q);
-    return calc_digamma_cpu(1 - x) - pi<double> / tan(pi<double> * r);
-  }
-
-  // Push x to be >= 10
-  double result = 0;
-  while (x < 10) {
-    result -= 1 / x;
-    x += 1;
-  }
-  if (x == 10) { return result + PSI_10; }
-
-  // Compute asymptotic digamma
-  static const double A[] = {
-      8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
-      -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
-      8.33333333333333333333E-2,
-  };
-
-  double y = 0;
-  if (x < 1.0e17) {
-    double z = 1.0 / (x * x);
-    y = z * polevl(z, A, 6);
-  }
-  return result + log(x) - (0.5 / x) - y;
-}
-
-/*
- * This function is derived from the implementation of the digamma function in the Cephes Math
- * Library. See note [3-Clause BSD License for the Cephes Math Library].
- */
-
-float calc_digamma_cpu(float x) {
-  static float PSI_10 = 2.25175258906672110764f;
-  if (x == 0) {
-    // As per C++ standard for gamma related functions and SciPy,
-    // If the argument is ±0, ±∞ is returned
-    return std::copysign(INFINITY, -x);
-  }
-
-  bool x_is_integer = x == truncf(x);
-  if (x < 0) {
-    if (x_is_integer) {
-      // As per C++ standard for gamma related functions and SciPy,
-      // If the argument is a negative integer, NaN is returned
-      return std::numeric_limits<float>::quiet_NaN();
-    }
-    // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
-    // accurate than tan(pi * x). While these operations are mathematically equivalent
-    // since both x and r are in radians and tan() has a periodicity of pi, in practice
-    // the computation of pi * x is a source of error (when |x| > 1).
-    double q, r;
-    r = std::modf(x, &q);
-    float pi_over_tan_pi_x = (float)(pi<double> / tan(pi<double> * r));
-    return calc_digamma_cpu(1 - x) - pi_over_tan_pi_x;
-  }
-
-  // Push x to be >= 10
-  float result = 0;
-  while (x < 10) {
-    result -= 1 / x;
-    x += 1;
-  }
-  if (x == 10) { return result + PSI_10; }
-
-  // Compute asymptotic digamma
-  static const float A[] = {
-      8.33333333333333333333E-2f,  -2.10927960927960927961E-2f, 7.57575757575757575758E-3f,
-      -4.16666666666666666667E-3f, 3.96825396825396825397E-3f,  -8.33333333333333333333E-3f,
-      8.33333333333333333333E-2f,
-  };
-
-  float y = 0;
-  if (x < 1.0e17f) {
-    float z = 1 / (x * x);
-    y = z * polevl(z, A, 6);
-  }
-  return result + logf(x) - (0.5f / x) - y;
-}
 
 }  // namespace oneflow
diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h
index 4d8461a15e8..4086fe56607 100644
--- a/oneflow/core/common/math_util.h
+++ b/oneflow/core/common/math_util.h
@@ -31,66 +31,7 @@ int64_t Gcd(int64_t m, int64_t n);
 
 int64_t Lcm(int64_t m, int64_t n);
 
-template<typename T>
-T polevl(const T x, const T A[], size_t len);
-
-// This function references pytorch/aten/src/ATen/native/Math.h
-double calc_digamma_cpu(double x);
-
-float calc_digamma_cpu(float x);
-
-template<typename scalar_t, typename accscalar_t>
-OF_DEVICE_FUNC scalar_t calc_digamma_cuda(scalar_t in) {
-  static const double PI_f64 = 3.14159265358979323846;
-  const accscalar_t PSI_10 = 2.25175258906672110764;
-  const accscalar_t A[] = {
-      8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
-      -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
-      8.33333333333333333333E-2,
-  };
-
-  accscalar_t x = static_cast<accscalar_t>(in);
-  if (x == static_cast<accscalar_t>(0)) {
-    // As per C++ standard for gamma related functions and SciPy,
-    // If the argument is ±0, ±∞ is returned
-    return std::copysign(static_cast<scalar_t>(INFINITY), -x);
-  }
 
-  bool x_is_integer = x == trunc(x);
-  accscalar_t result = static_cast<accscalar_t>(0);
-  if (x < 0) {
-    if (x_is_integer) {
-      // As per C++ standard for gamma related functions and SciPy,
-      // If the argument is a negative integer, NaN is returned
-      return static_cast<scalar_t>(NAN);
-    }
-    // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
-    // accurate than tan(pi * x). While these operations are mathematically equivalent
-    // since both x and r are in radians and tan() has a periodicity of pi, in practice
-    // the computation of pi * x is a source of error (when |x| > 1).
-    double q, r;
-    r = modf(static_cast<double>(x), &q);
-    result = static_cast<accscalar_t>(-PI_f64 / tan(PI_f64 * r));
-    x = static_cast<accscalar_t>(1) - x;
-  }
-
-  while (x < 10) {
-    result -= static_cast<accscalar_t>(1) / x;
-    x += 1;
-  }
-  if (x == static_cast<accscalar_t>(10)) { return static_cast<scalar_t>(result + PSI_10); }
-
-  accscalar_t y = 0;
-  if (x < 1.0e17) {
-    accscalar_t z = static_cast<accscalar_t>(1) / (x * x);
-
-    accscalar_t polevl_result = 0;
-    for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
-    y = z * polevl_result;
-  }
-
-  return static_cast<scalar_t>(log(x) - (static_cast<accscalar_t>(0.5) / x) - y + result);
-}
 
 template<typename T>
 OF_DEVICE_FUNC T DeviceMin(T a, T b) {
diff --git a/oneflow/core/ep/cpu/primitive/binary_functor.h b/oneflow/core/ep/cpu/primitive/binary_functor.h
index 2c748c58968..a7d14c9deeb 100644
--- a/oneflow/core/ep/cpu/primitive/binary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/binary_functor.h
@@ -358,7 +358,7 @@ struct BinaryFunctor<DeviceType::kCPU, BinaryOp::kDigammaBackwardWithDyX, Src, D
   OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {}
   OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const {
     // TODO:shijiaxing： This function is named trigamma, it will be implemented soon.
-    assert(false);
+   UNIMPLEMENTED();
     return 0;
   }
 };
diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h
index 170d1eafa9d..d1f6b18d1fd 100644
--- a/oneflow/core/ep/cpu/primitive/unary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/unary_functor.h
@@ -121,11 +121,133 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kRsqrt, Dst, Src> {
   }
 };
 
-template<typename Dst, typename Src>
-struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, Dst, Src> {
+template<>
+struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, float, float> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
-  OF_DEVICE_FUNC Dst operator()(Src src) const { return static_cast<Dst>(calc_digamma_cpu(src)); }
+  OF_DEVICE_FUNC float operator()(float src) const {
+    const auto& calc_digamma = [](float x) {
+      std::function<float(float)> compute;
+      compute = [&](float x) {
+        static float PSI_10 = 2.25175258906672110764f;
+        if (x == 0) {
+          // As per C++ standard for gamma related functions and SciPy,
+          // If the argument is ±0, ±∞ is returned
+          return std::copysign(INFINITY, -x);
+        }
+
+        bool x_is_integer = x == truncf(x);
+        if (x < 0) {
+          if (x_is_integer) {
+            // As per C++ standard for gamma related functions and SciPy,
+            // If the argument is a negative integer, NaN is returned
+            return std::numeric_limits<float>::quiet_NaN();
+          }
+          // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+          // accurate than tan(pi * x). While these operations are mathematically equivalent
+          // since both x and r are in radians and tan() has a periodicity of pi, in practice
+          // the computation of pi * x is a source of error (when |x| > 1).
+          double q, r;
+          r = std::modf(x, &q);
+          float pi_over_tan_pi_x = (float)(pi<double> / tan(pi<double> * r));
+          return compute(1 - x) - pi_over_tan_pi_x;
+        }
+
+        // Push x to be >= 10
+        float result = 0;
+        while (x < 10) {
+          result -= 1 / x;
+          x += 1;
+        }
+        if (x == 10) { return result + PSI_10; }
+
+        // Compute asymptotic digamma
+        static const float A[] = {
+            8.33333333333333333333E-2f,  -2.10927960927960927961E-2f, 7.57575757575757575758E-3f,
+            -4.16666666666666666667E-3f, 3.96825396825396825397E-3f,  -8.33333333333333333333E-3f,
+            8.33333333333333333333E-2f,
+        };
+
+        float y = 0;
+        if (x < 1.0e17f) {
+          float z = 1 / (x * x);
+         float polevl_result = 0;
+    for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+    y = z * polevl_result;
+        }
+        return result + logf(x) - (0.5f / x) - y;
+      };
+
+      return compute(x);
+    };
+
+    return calc_digamma(src);
+  }
+};
+
+template<>
+struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, double, double> {
+  OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
+
+  OF_DEVICE_FUNC double operator()(double src) const {
+    const auto& calc_digamma = [](double x) {
+      std::function<double(double)> compute;
+      compute = [&](double x) {
+        static double PSI_10 = 2.25175258906672110764;
+        if (x == 0) {
+          // As per C++ standard for gamma related functions and SciPy,
+          // If the argument is ±0, ±∞ is returned
+          return std::copysign(INFINITY, -x);
+        }
+
+        bool x_is_integer = x == trunc(x);
+        if (x < 0) {
+          if (x_is_integer) {
+            // As per C++ standard for gamma related functions and SciPy,
+            // If the argument is a negative integer, NaN is returned
+            return std::numeric_limits<double>::quiet_NaN();
+          }
+          // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+          // accurate than tan(pi * x). While these operations are mathematically equivalent
+          // since both x and r are in radians and tan() has a periodicity of pi, in practice
+          // the computation of pi * x is a source of error (when |x| > 1).
+          double q, r;
+          r = std::modf(x, &q);
+          return compute(1 - x) - pi<double> / tan(pi<double> * r);
+        }
+
+        // Push x to be >= 10
+        double result = 0;
+        while (x < 10) {
+          result -= 1 / x;
+          x += 1;
+        }
+        if (x == 10) { return result + PSI_10; }
+
+        // Compute asymptotic digamma
+        static const double A[] = {
+            8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
+            -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
+            8.33333333333333333333E-2,
+        };
+
+        double y = 0;
+        if (x < 1.0e17) {
+          double z = 1.0 / (x * x);
+          // y = z * polevl(z, A, 6);
+
+          double polevl_result = 0;
+    for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+    y = z * polevl_result;
+        }
+        return result + log(x) - (0.5 / x) - y;
+      };
+
+      return compute(x);
+    };
+
+    return calc_digamma(src);
+  }
 };
 
 template<>
diff --git a/oneflow/core/ep/cuda/primitive/binary_functor.cuh b/oneflow/core/ep/cuda/primitive/binary_functor.cuh
index 252eb3b418f..89b65607423 100644
--- a/oneflow/core/ep/cuda/primitive/binary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/binary_functor.cuh
@@ -245,7 +245,7 @@ struct BinaryFunctor<DeviceType::kCUDA, BinaryOp::kDigammaBackwardWithDyX, Src,
   OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {}
   OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const {
     // TODO:shijiaxing： This function is named trigamma, it will be implemented soon.
-    assert(false);
+    UNIMPLEMENTED();
     return static_cast<Dst>(0.0);
   }
 };
diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
index 5dcbefac4c5..96e28827979 100644
--- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
@@ -228,8 +228,57 @@ template<typename Dst, typename Src>
 struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
-  OF_DEVICE_FUNC Dst operator()(Src src) const {
-    return static_cast<Dst>(calc_digamma_cuda<Src, Src>(src));
+  OF_DEVICE_FUNC Dst operator()(Src in) const {
+    
+    static const double PI_f64 = 3.14159265358979323846;
+  const Src PSI_10 = 2.25175258906672110764;
+  const Src A[] = {
+      8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
+      -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
+      8.33333333333333333333E-2,
+  };
+
+  Src x = static_cast<Src>(in);
+  if (x == static_cast<Src>(0)) {
+    // As per C++ standard for gamma related functions and SciPy,
+    // If the argument is ±0, ±∞ is returned
+    return std::copysign(static_cast<Src>(INFINITY), -x);
+  }
+
+  bool x_is_integer = x == trunc(x);
+  Src result = static_cast<Src>(0);
+  if (x < 0) {
+    if (x_is_integer) {
+      // As per C++ standard for gamma related functions and SciPy,
+      // If the argument is a negative integer, NaN is returned
+      return static_cast<Src>(NAN);
+    }
+    // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+    // accurate than tan(pi * x). While these operations are mathematically equivalent
+    // since both x and r are in radians and tan() has a periodicity of pi, in practice
+    // the computation of pi * x is a source of error (when |x| > 1).
+    double q, r;
+    r = modf(static_cast<double>(x), &q);
+    result = static_cast<Src>(-PI_f64 / tan(PI_f64 * r));
+    x = static_cast<Src>(1) - x;
+  }
+
+  while (x < 10) {
+    result -= static_cast<Src>(1) / x;
+    x += 1;
+  }
+  if (x == static_cast<Src>(10)) { return static_cast<Src>(result + PSI_10); }
+
+  Src y = 0;
+  if (x < 1.0e17) {
+    Src z = static_cast<Src>(1) / (x * x);
+
+    Src polevl_result = 0;
+    for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+    y = z * polevl_result;
+  }
+
+  return static_cast<Src>(log(x) - (static_cast<Src>(0.5) / x) - y + result);
   }
 };
 

From 17fe31ab46cb4a744fbe6a5e9cb2406903e550d9 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Mon, 3 Apr 2023 04:46:47 +0000
Subject: [PATCH 15/21] fmt

---
 oneflow/core/common/math_util.cpp | 1 -
 oneflow/core/common/math_util.h   | 2 --
 2 files changed, 3 deletions(-)

diff --git a/oneflow/core/common/math_util.cpp b/oneflow/core/common/math_util.cpp
index 694c8bf1e3b..a60f37b6400 100644
--- a/oneflow/core/common/math_util.cpp
+++ b/oneflow/core/common/math_util.cpp
@@ -29,5 +29,4 @@ int64_t Gcd(int64_t m, int64_t n) {
 
 int64_t Lcm(int64_t m, int64_t n) { return m * n / Gcd(m, n); }
 
-
 }  // namespace oneflow
diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h
index 4086fe56607..f19e20ff45b 100644
--- a/oneflow/core/common/math_util.h
+++ b/oneflow/core/common/math_util.h
@@ -31,8 +31,6 @@ int64_t Gcd(int64_t m, int64_t n);
 
 int64_t Lcm(int64_t m, int64_t n);
 
-
-
 template<typename T>
 OF_DEVICE_FUNC T DeviceMin(T a, T b) {
 #if defined(__CUDA_ARCH__)

From 8b80a76acda6446742fb830efe272e3834576199 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Mon, 3 Apr 2023 04:47:51 +0000
Subject: [PATCH 16/21] fmt

---
 .../core/ep/cpu/primitive/binary_functor.h    |  2 +-
 oneflow/core/ep/cpu/primitive/unary_functor.h | 10 +--
 .../core/ep/cuda/primitive/unary_functor.cuh  | 85 +++++++++----------
 3 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/oneflow/core/ep/cpu/primitive/binary_functor.h b/oneflow/core/ep/cpu/primitive/binary_functor.h
index a7d14c9deeb..e80d63569f8 100644
--- a/oneflow/core/ep/cpu/primitive/binary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/binary_functor.h
@@ -358,7 +358,7 @@ struct BinaryFunctor<DeviceType::kCPU, BinaryOp::kDigammaBackwardWithDyX, Src, D
   OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {}
   OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const {
     // TODO:shijiaxing： This function is named trigamma, it will be implemented soon.
-   UNIMPLEMENTED();
+    UNIMPLEMENTED();
     return 0;
   }
 };
diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h
index d1f6b18d1fd..a5ec7a61402 100644
--- a/oneflow/core/ep/cpu/primitive/unary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/unary_functor.h
@@ -171,9 +171,9 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, float, float> {
         float y = 0;
         if (x < 1.0e17f) {
           float z = 1 / (x * x);
-         float polevl_result = 0;
-    for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
-    y = z * polevl_result;
+          float polevl_result = 0;
+          for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+          y = z * polevl_result;
         }
         return result + logf(x) - (0.5f / x) - y;
       };
@@ -237,8 +237,8 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, double, double> {
           // y = z * polevl(z, A, 6);
 
           double polevl_result = 0;
-    for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
-    y = z * polevl_result;
+          for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+          y = z * polevl_result;
         }
         return result + log(x) - (0.5 / x) - y;
       };
diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
index 96e28827979..c1169271810 100644
--- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
@@ -229,56 +229,55 @@ struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
   OF_DEVICE_FUNC Dst operator()(Src in) const {
-    
     static const double PI_f64 = 3.14159265358979323846;
-  const Src PSI_10 = 2.25175258906672110764;
-  const Src A[] = {
-      8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
-      -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
-      8.33333333333333333333E-2,
-  };
-
-  Src x = static_cast<Src>(in);
-  if (x == static_cast<Src>(0)) {
-    // As per C++ standard for gamma related functions and SciPy,
-    // If the argument is ±0, ±∞ is returned
-    return std::copysign(static_cast<Src>(INFINITY), -x);
-  }
-
-  bool x_is_integer = x == trunc(x);
-  Src result = static_cast<Src>(0);
-  if (x < 0) {
-    if (x_is_integer) {
+    const Src PSI_10 = 2.25175258906672110764;
+    const Src A[] = {
+        8.33333333333333333333E-2,  -2.10927960927960927961E-2, 7.57575757575757575758E-3,
+        -4.16666666666666666667E-3, 3.96825396825396825397E-3,  -8.33333333333333333333E-3,
+        8.33333333333333333333E-2,
+    };
+
+    Src x = static_cast<Src>(in);
+    if (x == static_cast<Src>(0)) {
       // As per C++ standard for gamma related functions and SciPy,
-      // If the argument is a negative integer, NaN is returned
-      return static_cast<Src>(NAN);
+      // If the argument is ±0, ±∞ is returned
+      return std::copysign(static_cast<Src>(INFINITY), -x);
     }
-    // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
-    // accurate than tan(pi * x). While these operations are mathematically equivalent
-    // since both x and r are in radians and tan() has a periodicity of pi, in practice
-    // the computation of pi * x is a source of error (when |x| > 1).
-    double q, r;
-    r = modf(static_cast<double>(x), &q);
-    result = static_cast<Src>(-PI_f64 / tan(PI_f64 * r));
-    x = static_cast<Src>(1) - x;
-  }
 
-  while (x < 10) {
-    result -= static_cast<Src>(1) / x;
-    x += 1;
-  }
-  if (x == static_cast<Src>(10)) { return static_cast<Src>(result + PSI_10); }
+    bool x_is_integer = x == trunc(x);
+    Src result = static_cast<Src>(0);
+    if (x < 0) {
+      if (x_is_integer) {
+        // As per C++ standard for gamma related functions and SciPy,
+        // If the argument is a negative integer, NaN is returned
+        return static_cast<Src>(NAN);
+      }
+      // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
+      // accurate than tan(pi * x). While these operations are mathematically equivalent
+      // since both x and r are in radians and tan() has a periodicity of pi, in practice
+      // the computation of pi * x is a source of error (when |x| > 1).
+      double q, r;
+      r = modf(static_cast<double>(x), &q);
+      result = static_cast<Src>(-PI_f64 / tan(PI_f64 * r));
+      x = static_cast<Src>(1) - x;
+    }
 
-  Src y = 0;
-  if (x < 1.0e17) {
-    Src z = static_cast<Src>(1) / (x * x);
+    while (x < 10) {
+      result -= static_cast<Src>(1) / x;
+      x += 1;
+    }
+    if (x == static_cast<Src>(10)) { return static_cast<Src>(result + PSI_10); }
 
-    Src polevl_result = 0;
-    for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
-    y = z * polevl_result;
-  }
+    Src y = 0;
+    if (x < 1.0e17) {
+      Src z = static_cast<Src>(1) / (x * x);
+
+      Src polevl_result = 0;
+      for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; }
+      y = z * polevl_result;
+    }
 
-  return static_cast<Src>(log(x) - (static_cast<Src>(0.5) / x) - y + result);
+    return static_cast<Src>(log(x) - (static_cast<Src>(0.5) / x) - y + result);
   }
 };
 

From e1269531f4580143e886dc8ae3d2b8f0295e7885 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Fri, 7 Apr 2023 02:48:43 +0000
Subject: [PATCH 17/21] add references

---
 oneflow/core/ep/cpu/primitive/unary_functor.h    | 2 ++
 oneflow/core/ep/cuda/primitive/unary_functor.cuh | 1 +
 oneflow/core/functional/functional_api.yaml      | 3 +--
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h
index a5ec7a61402..34c79a33492 100644
--- a/oneflow/core/ep/cpu/primitive/unary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/unary_functor.h
@@ -126,6 +126,7 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, float, float> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
   OF_DEVICE_FUNC float operator()(float src) const {
+    // references  https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L434-L487
     const auto& calc_digamma = [](float x) {
       std::function<float(float)> compute;
       compute = [&](float x) {
@@ -190,6 +191,7 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, double, double> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
   OF_DEVICE_FUNC double operator()(double src) const {
+    // references  https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L376-L428
     const auto& calc_digamma = [](double x) {
       std::function<double(double)> compute;
       compute = [&](double x) {
diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
index c1169271810..e3bb1b7c8f6 100644
--- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
@@ -229,6 +229,7 @@ struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
   OF_DEVICE_FUNC Dst operator()(Src in) const {
+    // references https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/cuda/Math.cuh#L3029-L3090
     static const double PI_f64 = 3.14159265358979323846;
     const Src PSI_10 = 2.25175258906672110764;
     const Src A[] = {
diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml
index b77157a8e9d..90818166592 100644
--- a/oneflow/core/functional/functional_api.yaml
+++ b/oneflow/core/functional/functional_api.yaml
@@ -3317,5 +3317,4 @@
 
 - name: "digamma_grad"
   signature: "Tensor (Tensor x, Tensor dy) => DigammaGrad"
-  bind_python: False
-  
\ No newline at end of file
+  bind_python: False
\ No newline at end of file

From 809892db2822a4fb841f1966ab07092b97c45e85 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Fri, 7 Apr 2023 02:58:08 +0000
Subject: [PATCH 18/21] fmt

---
 oneflow/core/ep/cpu/primitive/unary_functor.h    | 6 ++++--
 oneflow/core/ep/cuda/primitive/unary_functor.cuh | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h
index 34c79a33492..e5e62ec645c 100644
--- a/oneflow/core/ep/cpu/primitive/unary_functor.h
+++ b/oneflow/core/ep/cpu/primitive/unary_functor.h
@@ -126,7 +126,8 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, float, float> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
   OF_DEVICE_FUNC float operator()(float src) const {
-    // references  https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L434-L487
+    // references
+    // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L434-L487
     const auto& calc_digamma = [](float x) {
       std::function<float(float)> compute;
       compute = [&](float x) {
@@ -191,7 +192,8 @@ struct UnaryFunctor<DeviceType::kCPU, UnaryOp::kDigamma, double, double> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
   OF_DEVICE_FUNC double operator()(double src) const {
-    // references  https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L376-L428
+    // references
+    // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L376-L428
     const auto& calc_digamma = [](double x) {
       std::function<double(double)> compute;
       compute = [&](double x) {
diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
index e3bb1b7c8f6..3c4ed58055a 100644
--- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh
@@ -229,7 +229,8 @@ struct UnaryFunctor<DeviceType::kCUDA, UnaryOp::kDigamma, Dst, Src> {
   OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {}
 
   OF_DEVICE_FUNC Dst operator()(Src in) const {
-    // references https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/cuda/Math.cuh#L3029-L3090
+    // references
+    // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/cuda/Math.cuh#L3029-L3090
     static const double PI_f64 = 3.14159265358979323846;
     const Src PSI_10 = 2.25175258906672110764;
     const Src A[] = {

From 7130878dc4d2be9e0ed1a819f3cba3d55f6f379c Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Fri, 7 Apr 2023 09:31:33 +0000
Subject: [PATCH 19/21] fix build

---
 oneflow/core/ep/cuda/primitive/binary_functor.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oneflow/core/ep/cuda/primitive/binary_functor.cuh b/oneflow/core/ep/cuda/primitive/binary_functor.cuh
index 89b65607423..252eb3b418f 100644
--- a/oneflow/core/ep/cuda/primitive/binary_functor.cuh
+++ b/oneflow/core/ep/cuda/primitive/binary_functor.cuh
@@ -245,7 +245,7 @@ struct BinaryFunctor<DeviceType::kCUDA, BinaryOp::kDigammaBackwardWithDyX, Src,
   OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {}
   OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const {
     // TODO:shijiaxing： This function is named trigamma, it will be implemented soon.
-    UNIMPLEMENTED();
+    assert(false);
     return static_cast<Dst>(0.0);
   }
 };

From d1f63ea66cf81305d93264e4ac0735c4ea0fdb51 Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Fri, 7 Apr 2023 09:33:26 +0000
Subject: [PATCH 20/21] fmt

---
 oneflow/core/functional/functional_api.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml
index 90818166592..1352b6dedcd 100644
--- a/oneflow/core/functional/functional_api.yaml
+++ b/oneflow/core/functional/functional_api.yaml
@@ -3317,4 +3317,4 @@
 
 - name: "digamma_grad"
   signature: "Tensor (Tensor x, Tensor dy) => DigammaGrad"
-  bind_python: False
\ No newline at end of file
+  bind_python: False

From 190aabb641900cd62bb6be786094969371e6b73c Mon Sep 17 00:00:00 2001
From: youxiudeshouyeren <1929724847@qq.com>
Date: Sat, 8 Apr 2023 03:21:13 +0000
Subject: [PATCH 21/21] fix

---
 oneflow/ir/include/OneFlow/OneFlowUserOps.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/oneflow/ir/include/OneFlow/OneFlowUserOps.td b/oneflow/ir/include/OneFlow/OneFlowUserOps.td
index 5d9600b1fb8..c14b13e3258 100644
--- a/oneflow/ir/include/OneFlow/OneFlowUserOps.td
+++ b/oneflow/ir/include/OneFlow/OneFlowUserOps.td
@@ -4585,7 +4585,7 @@ def OneFlow_LgammaGradOp : OneFlow_BaseOp<"lgamma_grad", [NoMemoryEffect, Declar
 }
 
 
-def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
+def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoMemoryEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
   let input = (ins
     OneFlow_Tensor:$x
   );
@@ -4598,7 +4598,7 @@ def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoSideEffect, DeclareOpInterf
   let has_data_type_infer_fn = 1;
 }
 
-def OneFlow_DigammaGradOp : OneFlow_BaseOp<"digamma_grad", [NoSideEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
+def OneFlow_DigammaGradOp : OneFlow_BaseOp<"digamma_grad", [NoMemoryEffect, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
   let input = (ins
     OneFlow_Tensor:$x,
     OneFlow_Tensor:$dy