From cb8ffeaa066564a76b3fbda8d870b0d8e4d8c269 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sat, 1 Apr 2023 12:13:25 +0000 Subject: [PATCH 01/21] digamma op dev --- oneflow/core/common/math_util.cpp | 112 ++++++++++++++++++ oneflow/core/common/math_util.h | 69 +++++++++++ .../primitive/broadcast_elementwise_binary.h | 19 +-- .../ep/common/primitive/elementwise_unary.h | 1 + .../core/ep/cpu/primitive/binary_functor.h | 10 ++ oneflow/core/ep/cpu/primitive/unary_functor.h | 10 +- .../core/ep/cuda/primitive/binary_functor.cuh | 10 ++ .../core/ep/cuda/primitive/unary_functor.cuh | 29 +++++ oneflow/core/ep/include/primitive/binary_op.h | 1 + oneflow/core/ep/include/primitive/unary_op.h | 1 + oneflow/core/functional/functional_api.yaml | 11 +- .../core/functional/impl/unary_functor.cpp | 4 +- oneflow/ir/include/OneFlow/OneFlowUserOps.td | 26 ++++ ...ath_unary_elementwise_primitive_kernel.cpp | 2 + oneflow/user/ops/math_unary_elementwise_seq.h | 2 + python/oneflow/special/__init__.py | 1 + python/oneflow/special/special_ops.py | 3 + 17 files changed, 299 insertions(+), 12 deletions(-) diff --git a/oneflow/core/common/math_util.cpp b/oneflow/core/common/math_util.cpp index a60f37b6400..f5ce19d4491 100644 --- a/oneflow/core/common/math_util.cpp +++ b/oneflow/core/common/math_util.cpp @@ -29,4 +29,116 @@ int64_t Gcd(int64_t m, int64_t n) { int64_t Lcm(int64_t m, int64_t n) { return m * n / Gcd(m, n); } +template +T polevl(const T x, const T A[], size_t len) { + T result = 0; + for (size_t i = 0; i <= len; i++) { result = result * x + A[i]; } + return result; +} + +/* + * This function is derived from the implementation of the digamma function in the Cephes Math + * Library. See note [3-Clause BSD License for the Cephes Math Library]. + */ + +double calc_digamma_cpu(double x) { + static double PSI_10 = 2.25175258906672110764; + if (x == 0) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is ±0, ±∞ is returned + return std::copysign(INFINITY, -x); + } + + bool x_is_integer = x == trunc(x); + if (x < 0) { + if (x_is_integer) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is a negative integer, NaN is returned + return std::numeric_limits::quiet_NaN(); + } + // Extracts the fractional part of x as r, since tan(pi * r) is more numerically + // accurate than tan(pi * x). While these operations are mathematically equivalent + // since both x and r are in radians and tan() has a periodicity of pi, in practice + // the computation of pi * x is a source of error (when |x| > 1). + double q, r; + r = std::modf(x, &q); + return calc_digamma_cpu(1 - x) - pi / tan(pi * r); + } + + // Push x to be >= 10 + double result = 0; + while (x < 10) { + result -= 1 / x; + x += 1; + } + if (x == 10) { return result + PSI_10; } + + // Compute asymptotic digamma + static const double A[] = { + 8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, + -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, + 8.33333333333333333333E-2, + }; + + double y = 0; + if (x < 1.0e17) { + double z = 1.0 / (x * x); + y = z * polevl(z, A, 6); + } + return result + log(x) - (0.5 / x) - y; +} + +/* + * This function is derived from the implementation of the digamma function in the Cephes Math + * Library. See note [3-Clause BSD License for the Cephes Math Library]. + */ + +float calc_digamma_cpu(float x) { + static float PSI_10 = 2.25175258906672110764f; + if (x == 0) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is ±0, ±∞ is returned + return std::copysign(INFINITY, -x); + } + + bool x_is_integer = x == truncf(x); + if (x < 0) { + if (x_is_integer) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is a negative integer, NaN is returned + return std::numeric_limits::quiet_NaN(); + } + // Extracts the fractional part of x as r, since tan(pi * r) is more numerically + // accurate than tan(pi * x). While these operations are mathematically equivalent + // since both x and r are in radians and tan() has a periodicity of pi, in practice + // the computation of pi * x is a source of error (when |x| > 1). + double q, r; + r = std::modf(x, &q); + float pi_over_tan_pi_x = (float)(pi / tan(pi * r)); + return calc_digamma_cpu(1 - x) - pi_over_tan_pi_x; + } + + // Push x to be >= 10 + float result = 0; + while (x < 10) { + result -= 1 / x; + x += 1; + } + if (x == 10) { return result + PSI_10; } + + // Compute asymptotic digamma + static const float A[] = { + 8.33333333333333333333E-2f, -2.10927960927960927961E-2f, 7.57575757575757575758E-3f, + -4.16666666666666666667E-3f, 3.96825396825396825397E-3f, -8.33333333333333333333E-3f, + 8.33333333333333333333E-2f, + }; + + float y = 0; + if (x < 1.0e17f) { + float z = 1 / (x * x); + y = z * polevl(z, A, 6); + } + return result + logf(x) - (0.5f / x) - y; +} + } // namespace oneflow diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h index b0361f90c73..7aba95e5cdb 100644 --- a/oneflow/core/common/math_util.h +++ b/oneflow/core/common/math_util.h @@ -25,6 +25,67 @@ int64_t Gcd(int64_t m, int64_t n); int64_t Lcm(int64_t m, int64_t n); +template + T polevl(const T x, const T A[], size_t len); + +// This function references pytorch/aten/src/ATen/native/Math.h +double calc_digamma_cpu(double x); + +float calc_digamma_cpu(float x); + +template +static OF_DEVICE_FUNC scalar_t calc_digamma_cuda(scalar_t in) { + static const double PI_f64 = 3.14159265358979323846; + const accscalar_t PSI_10 = 2.25175258906672110764; + const accscalar_t A[] = { + 8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, + -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, + 8.33333333333333333333E-2, + }; + + accscalar_t x = static_cast(in); + if (x == static_cast(0)) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is ±0, ±∞ is returned + return std::copysign(static_cast(INFINITY), -x); + } + + bool x_is_integer = x == trunc(x); + accscalar_t result = static_cast(0); + if (x < 0) { + if (x_is_integer) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is a negative integer, NaN is returned + return static_cast(NAN); + } + // Extracts the fractional part of x as r, since tan(pi * r) is more numerically + // accurate than tan(pi * x). While these operations are mathematically equivalent + // since both x and r are in radians and tan() has a periodicity of pi, in practice + // the computation of pi * x is a source of error (when |x| > 1). + double q, r; + r = modf(static_cast(x), &q); + result = static_cast(-PI_f64 / tan(PI_f64 * r)); + x = static_cast(1) - x; + } + + while (x < 10) { + result -= static_cast(1) / x; + x += 1; + } + if (x == static_cast(10)) { return static_cast(result + PSI_10); } + + accscalar_t y = 0; + if (x < 1.0e17) { + accscalar_t z = static_cast(1) / (x * x); + + accscalar_t polevl_result = 0; + for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } + y = z * polevl_result; + } + + return static_cast(log(x) - (static_cast(0.5) / x) - y + result); +} + template OF_DEVICE_FUNC T DeviceMin(T a, T b) { #if defined(__CUDA_ARCH__) @@ -43,6 +104,14 @@ OF_DEVICE_FUNC T DeviceMax(T a, T b) { #endif } +template +constexpr T pi = static_cast(3.141592653589793238462643383279502); + +// template +// inline constexpr T pi() { +// return static_cast(3.141592653589793238462643383279502); +// } + } // namespace oneflow #endif // ONEFLOW_CORE_COMMON_MATH_UTIL_H_ diff --git a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h index 70c1382a559..2b4ad33a9d1 100644 --- a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h +++ b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h @@ -131,15 +131,16 @@ inline bool IsDimsEquals(size_t num_src0_dims, const int64_t* src0_dims, size_t OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kAtanhBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCosBackwardWithDyX) -#define BINARY_MATH_BACKWARD_OP_SEQ_1 \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX) \ +#define BINARY_MATH_BACKWARD_OP_SEQ_1 \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLog2BackwardWithDyX) #define BINARY_MATH_BACKWARD_OP_SEQ_2 \ diff --git a/oneflow/core/ep/common/primitive/elementwise_unary.h b/oneflow/core/ep/common/primitive/elementwise_unary.h index d5d14ef6c80..136b587d9c8 100644 --- a/oneflow/core/ep/common/primitive/elementwise_unary.h +++ b/oneflow/core/ep/common/primitive/elementwise_unary.h @@ -54,6 +54,7 @@ namespace primitive { OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCeil) \ OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCos) \ OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kCosh) \ + OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kDigamma) \ OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kErf) \ OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kErfc) \ OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kExp) \ diff --git a/oneflow/core/ep/cpu/primitive/binary_functor.h b/oneflow/core/ep/cpu/primitive/binary_functor.h index 4b25663a73a..2c748c58968 100644 --- a/oneflow/core/ep/cpu/primitive/binary_functor.h +++ b/oneflow/core/ep/cpu/primitive/binary_functor.h @@ -353,6 +353,16 @@ struct BinaryFunctor } }; +template +struct BinaryFunctor { + OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {} + OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const { + // TODO:shijiaxing: This function is named trigamma, it will be implemented soon. + assert(false); + return 0; + } +}; + #define SPECIALIZATION_CPU_BINARY_FUNCTOR(op, type) \ template<> \ struct BinaryFunctor { \ diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h index b3119769644..cc0b94d33a8 100644 --- a/oneflow/core/ep/cpu/primitive/unary_functor.h +++ b/oneflow/core/ep/cpu/primitive/unary_functor.h @@ -15,7 +15,7 @@ limitations under the License. */ #include "oneflow/core/ep/common/primitive/unary_functor.h" #include "oneflow/core/ep/cpu/primitive/type_seq.h" - +#include "oneflow/core/common/math_util.h" namespace oneflow { namespace ep { namespace primitive { @@ -120,6 +120,13 @@ struct UnaryFunctor { } }; +template +struct UnaryFunctor { + OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} + + OF_DEVICE_FUNC Dst operator()(Src src) const { return static_cast(calc_digamma_cpu(src)); } +}; + template<> struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} @@ -187,6 +194,7 @@ SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kReciprocalNoNan); SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNotEqualZero); SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kFastGelu); SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kQuickGelu); +SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kDigamma); template<> struct UnaryFunctor { diff --git a/oneflow/core/ep/cuda/primitive/binary_functor.cuh b/oneflow/core/ep/cuda/primitive/binary_functor.cuh index d62f50e4628..252eb3b418f 100644 --- a/oneflow/core/ep/cuda/primitive/binary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/binary_functor.cuh @@ -240,6 +240,16 @@ struct BinaryFunctor { float atol, rtol; }; +template +struct BinaryFunctor { + OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {} + OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const { + // TODO:shijiaxing: This function is named trigamma, it will be implemented soon. + assert(false); + return static_cast(0.0); + } +}; + #define SPECIALIZATION_INTEGRAL_CLOSENESS_BINARY_FUNCTOR(op, type) \ template \ struct BinaryFunctor { \ diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index 91196a6c382..820a135d0e5 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -18,6 +18,7 @@ limitations under the License. #include "oneflow/core/cuda/elementwise.cuh" #include "oneflow/core/ep/cuda/cuda_stream.h" #include +#include "oneflow/core/common/math_util.h" namespace oneflow { namespace ep { @@ -223,6 +224,33 @@ struct UnaryFunctor { OF_DEVICE_FUNC double operator()(double src) const { return trunc(src); } }; + +template +struct UnaryFunctor { + OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} + + OF_DEVICE_FUNC Dst operator()(Src src) const { + return static_cast(calc_digamma_cuda(src)); + } +}; + +template<> +struct UnaryFunctor { + OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} + + OF_DEVICE_FUNC half operator()(half src) const { + return calc_digamma_cuda(src); + } +}; +// template<> +// struct UnaryFunctor { +// OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} + +// OF_DEVICE_FUNC double operator()(double src) const { +// return calc_digamma_cuda(src); +// } +// }; + template<> struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} @@ -443,6 +471,7 @@ SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNotEqualZero); SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNanAssign); SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kFastGelu); SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kQuickGelu); +SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kDigamma); template<> struct UnaryFunctor { diff --git a/oneflow/core/ep/include/primitive/binary_op.h b/oneflow/core/ep/include/primitive/binary_op.h index 6447d7baf3e..7f003e6c025 100644 --- a/oneflow/core/ep/include/primitive/binary_op.h +++ b/oneflow/core/ep/include/primitive/binary_op.h @@ -92,6 +92,7 @@ enum class BinaryOp { kExp2BackwardWithDyX, kExpm1BackwardWithDyX, kLgammaBackwardWithDyX, + kDigammaBackwardWithDyX, kLogBackwardWithDyX, kLog2BackwardWithDyX, kLog10BackwardWithDyX, diff --git a/oneflow/core/ep/include/primitive/unary_op.h b/oneflow/core/ep/include/primitive/unary_op.h index b20bbb28760..b487cf25cc7 100644 --- a/oneflow/core/ep/include/primitive/unary_op.h +++ b/oneflow/core/ep/include/primitive/unary_op.h @@ -54,6 +54,7 @@ enum class UnaryOp { kCeil, kCos, kCosh, + kDigamma, kErf, kErfc, kExp, diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml index 383e4c9f031..b77157a8e9d 100644 --- a/oneflow/core/functional/functional_api.yaml +++ b/oneflow/core/functional/functional_api.yaml @@ -3309,4 +3309,13 @@ - name: "frac_" signature: "Tensor (Tensor x) => FracInplace" - bind_python: True \ No newline at end of file + bind_python: True + +- name: "digamma" + signature: "Tensor (Tensor x) => Digamma" + bind_python: True + +- name: "digamma_grad" + signature: "Tensor (Tensor x, Tensor dy) => DigammaGrad" + bind_python: False + \ No newline at end of file diff --git a/oneflow/core/functional/impl/unary_functor.cpp b/oneflow/core/functional/impl/unary_functor.cpp index 6d48b088613..3d1733efce0 100644 --- a/oneflow/core/functional/impl/unary_functor.cpp +++ b/oneflow/core/functional/impl/unary_functor.cpp @@ -64,7 +64,8 @@ namespace impl { OF_PP_MAKE_TUPLE_SEQ("sqrt", Sqrt) \ OF_PP_MAKE_TUPLE_SEQ("square", Square) \ OF_PP_MAKE_TUPLE_SEQ("tan", Tan) \ - OF_PP_MAKE_TUPLE_SEQ("tanh", Tanh) + OF_PP_MAKE_TUPLE_SEQ("tanh", Tanh) \ + OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma) #define FLOAT_UNARY_PRIMITIVE_FUNC_BWD_WITH_DY_Y_SEQ OF_PP_MAKE_TUPLE_SEQ("sigmoid", Sigmoid) @@ -179,6 +180,7 @@ ONEFLOW_FUNCTION_LIBRARY(m) { m.add_functor("Ceil"); ADD_UNARY_FUNCTOR_WITH_DY_X(Cos, "Cos"); ADD_UNARY_FUNCTOR_WITH_DY_X(Cosh, "Cosh"); + ADD_UNARY_FUNCTOR_WITH_DY_X(Digamma,"Digamma"); ADD_UNARY_FUNCTOR_WITH_DY_X(Erf, "Erf"); ADD_UNARY_FUNCTOR_WITH_DY_X(Erfc, "Erfc"); ADD_UNARY_FUNCTOR_WITH_DY_X(Exp, "Exp"); diff --git a/oneflow/ir/include/OneFlow/OneFlowUserOps.td b/oneflow/ir/include/OneFlow/OneFlowUserOps.td index edbe87d871f..4444fff724c 100644 --- a/oneflow/ir/include/OneFlow/OneFlowUserOps.td +++ b/oneflow/ir/include/OneFlow/OneFlowUserOps.td @@ -4584,6 +4584,32 @@ def OneFlow_LgammaGradOp : OneFlow_BaseOp<"lgamma_grad", [NoSideEffect, DeclareO let has_data_type_infer_fn = 1; } +def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoSideEffect, DeclareOpInterfaceMethods]> { + let input = (ins + OneFlow_Tensor:$x + ); + let output = (outs + OneFlow_Tensor:$y + ); + let has_logical_tensor_desc_infer_fn = 1; + let has_physical_tensor_desc_infer_fn = 1; + let has_get_sbp_fn = 1; + let has_data_type_infer_fn = 1; +} + +def OneFlow_DigammaGradOp : OneFlow_BaseOp<"digamma_grad", [NoSideEffect, DeclareOpInterfaceMethods]> { + let input = (ins + OneFlow_Tensor:$x + ); + let output = (outs + OneFlow_Tensor:$y + ); + let has_logical_tensor_desc_infer_fn = 1; + let has_physical_tensor_desc_infer_fn = 1; + let has_get_sbp_fn = 1; + let has_data_type_infer_fn = 1; +} + def OneFlow_LogOp : OneFlow_BaseOp<"log", [NoSideEffect, DeclareOpInterfaceMethods]> { let input = (ins OneFlow_Tensor:$x diff --git a/oneflow/user/kernels/math_unary_elementwise_primitive_kernel.cpp b/oneflow/user/kernels/math_unary_elementwise_primitive_kernel.cpp index a8a961e91db..2bfeb4c3374 100644 --- a/oneflow/user/kernels/math_unary_elementwise_primitive_kernel.cpp +++ b/oneflow/user/kernels/math_unary_elementwise_primitive_kernel.cpp @@ -30,6 +30,7 @@ namespace oneflow { OF_PP_MAKE_TUPLE_SEQ("ceil", ep::primitive::UnaryOp::kCeil) \ OF_PP_MAKE_TUPLE_SEQ("cos", ep::primitive::UnaryOp::kCos) \ OF_PP_MAKE_TUPLE_SEQ("cosh", ep::primitive::UnaryOp::kCosh) \ + OF_PP_MAKE_TUPLE_SEQ("digamma", ep::primitive::UnaryOp::kDigamma) \ OF_PP_MAKE_TUPLE_SEQ("erf", ep::primitive::UnaryOp::kErf) \ OF_PP_MAKE_TUPLE_SEQ("erfc", ep::primitive::UnaryOp::kErfc) \ OF_PP_MAKE_TUPLE_SEQ("exp", ep::primitive::UnaryOp::kExp) \ @@ -68,6 +69,7 @@ namespace oneflow { OF_PP_MAKE_TUPLE_SEQ("atanh_grad", ep::primitive::BinaryOp::kAtanhBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ("cos_grad", ep::primitive::BinaryOp::kCosBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ("cosh_grad", ep::primitive::BinaryOp::kCoshBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ("digamma_grad", ep::primitive::BinaryOp::kDigammaBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ("erf_grad", ep::primitive::BinaryOp::kErfBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ("erfc_grad", ep::primitive::BinaryOp::kErfcBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ("exp_grad", ep::primitive::BinaryOp::kExpBackwardWithDyX) \ diff --git a/oneflow/user/ops/math_unary_elementwise_seq.h b/oneflow/user/ops/math_unary_elementwise_seq.h index 9cb83ae23e4..43bacc19477 100644 --- a/oneflow/user/ops/math_unary_elementwise_seq.h +++ b/oneflow/user/ops/math_unary_elementwise_seq.h @@ -31,6 +31,7 @@ namespace oneflow { OF_PP_MAKE_TUPLE_SEQ("ceil", Ceil) \ OF_PP_MAKE_TUPLE_SEQ("cos", Cos) \ OF_PP_MAKE_TUPLE_SEQ("cosh", Cosh) \ + OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma) \ OF_PP_MAKE_TUPLE_SEQ("erf", Erf) \ OF_PP_MAKE_TUPLE_SEQ("erfc", Erfc) \ OF_PP_MAKE_TUPLE_SEQ("exp", Exp) \ @@ -75,6 +76,7 @@ namespace oneflow { OF_PP_MAKE_TUPLE_SEQ("expm1", Expm1) \ OF_PP_MAKE_TUPLE_SEQ("log", Log) \ OF_PP_MAKE_TUPLE_SEQ("lgamma", Lgamma) \ + OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma) \ OF_PP_MAKE_TUPLE_SEQ("log2", Log2) \ OF_PP_MAKE_TUPLE_SEQ("log10", Log10) \ OF_PP_MAKE_TUPLE_SEQ("log1p", Log1p) \ diff --git a/python/oneflow/special/__init__.py b/python/oneflow/special/__init__.py index 941cd04c1d9..dd3c369e3bc 100644 --- a/python/oneflow/special/__init__.py +++ b/python/oneflow/special/__init__.py @@ -25,3 +25,4 @@ from .special_ops import logsumexp from .special_ops import round from .special_ops import softmax +from .special_ops import digamma diff --git a/python/oneflow/special/special_ops.py b/python/oneflow/special/special_ops.py index bcbb038a6fe..0d7017f8b65 100644 --- a/python/oneflow/special/special_ops.py +++ b/python/oneflow/special/special_ops.py @@ -59,3 +59,6 @@ def round(x: Tensor): def softmax(x: Tensor, dim: int): return oneflow._C.softmax(x, dim) + +def digamma(x:Tensor): + return oneflow._C.digamma(x) \ No newline at end of file From fec26e3a4e5c0ec8d17d92f9569326e63b8094e6 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sat, 1 Apr 2023 12:13:36 +0000 Subject: [PATCH 02/21] unittest --- python/oneflow/test/modules/test_special_ops.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py index 28e7137b798..7db6a09a60f 100644 --- a/python/oneflow/test/modules/test_special_ops.py +++ b/python/oneflow/test/modules/test_special_ops.py @@ -112,6 +112,14 @@ def test_flow_logsumexp_with_random_data(test_case): x = random_tensor(4, random(0, 5), 2).to(device) y = torch.special.logsumexp(x, dim=np.random.randint(0, 3)) return y + + # TODO:shijiaxing When the grad function be implemented, set "auto_backward=auto" + @autotest(n=5, auto_backward=False) + def test_flow_digamma_with_random_data(test_case): + device = random_device() + x_dtype = random_dtype(["arithmetic"]) + x = random_tensor().to(device).to(x_dtype) + y = torch.special.digamma(x) if __name__ == "__main__": From 8aac0c7c8834e4c1abc818cbf7ea3411b16daaa4 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 07:50:55 +0000 Subject: [PATCH 03/21] refine --- oneflow/core/common/math_util.h | 18 ++++++++--------- .../primitive/broadcast_elementwise_binary.h | 20 +++++++++---------- oneflow/core/ep/cpu/primitive/unary_functor.h | 1 + .../core/ep/cuda/primitive/unary_functor.cuh | 14 ++----------- .../core/functional/impl/unary_functor.cpp | 2 +- oneflow/ir/include/OneFlow/OneFlowUserOps.td | 5 +++-- oneflow/user/ops/math_unary_elementwise_seq.h | 2 +- python/oneflow/special/special_ops.py | 5 +++-- .../oneflow/test/modules/test_special_ops.py | 4 ++-- 9 files changed, 31 insertions(+), 40 deletions(-) diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h index 7aba95e5cdb..4d8461a15e8 100644 --- a/oneflow/core/common/math_util.h +++ b/oneflow/core/common/math_util.h @@ -21,12 +21,18 @@ limitations under the License. namespace oneflow { +/* + * math constants + */ +template +constexpr T pi = static_cast(3.141592653589793238462643383279502); + int64_t Gcd(int64_t m, int64_t n); int64_t Lcm(int64_t m, int64_t n); template - T polevl(const T x, const T A[], size_t len); +T polevl(const T x, const T A[], size_t len); // This function references pytorch/aten/src/ATen/native/Math.h double calc_digamma_cpu(double x); @@ -34,7 +40,7 @@ double calc_digamma_cpu(double x); float calc_digamma_cpu(float x); template -static OF_DEVICE_FUNC scalar_t calc_digamma_cuda(scalar_t in) { +OF_DEVICE_FUNC scalar_t calc_digamma_cuda(scalar_t in) { static const double PI_f64 = 3.14159265358979323846; const accscalar_t PSI_10 = 2.25175258906672110764; const accscalar_t A[] = { @@ -104,14 +110,6 @@ OF_DEVICE_FUNC T DeviceMax(T a, T b) { #endif } -template -constexpr T pi = static_cast(3.141592653589793238462643383279502); - -// template -// inline constexpr T pi() { -// return static_cast(3.141592653589793238462643383279502); -// } - } // namespace oneflow #endif // ONEFLOW_CORE_COMMON_MATH_UTIL_H_ diff --git a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h index 2b4ad33a9d1..9aad1dc068f 100644 --- a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h +++ b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h @@ -131,16 +131,16 @@ inline bool IsDimsEquals(size_t num_src0_dims, const int64_t* src0_dims, size_t OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kAtanhBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCosBackwardWithDyX) -#define BINARY_MATH_BACKWARD_OP_SEQ_1 \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX) \ +#define BINARY_MATH_BACKWARD_OP_SEQ_1 \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX)\ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLog2BackwardWithDyX) #define BINARY_MATH_BACKWARD_OP_SEQ_2 \ diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h index cc0b94d33a8..170d1eafa9d 100644 --- a/oneflow/core/ep/cpu/primitive/unary_functor.h +++ b/oneflow/core/ep/cpu/primitive/unary_functor.h @@ -16,6 +16,7 @@ limitations under the License. #include "oneflow/core/ep/common/primitive/unary_functor.h" #include "oneflow/core/ep/cpu/primitive/type_seq.h" #include "oneflow/core/common/math_util.h" + namespace oneflow { namespace ep { namespace primitive { diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index 820a135d0e5..3b29695632e 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -224,13 +224,12 @@ struct UnaryFunctor { OF_DEVICE_FUNC double operator()(double src) const { return trunc(src); } }; - template struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} OF_DEVICE_FUNC Dst operator()(Src src) const { - return static_cast(calc_digamma_cuda(src)); + return static_cast(calc_digamma_cuda(src)); } }; @@ -238,18 +237,9 @@ template<> struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} - OF_DEVICE_FUNC half operator()(half src) const { - return calc_digamma_cuda(src); - } + OF_DEVICE_FUNC half operator()(half src) const { return calc_digamma_cuda(src); } }; -// template<> -// struct UnaryFunctor { -// OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} -// OF_DEVICE_FUNC double operator()(double src) const { -// return calc_digamma_cuda(src); -// } -// }; template<> struct UnaryFunctor { diff --git a/oneflow/core/functional/impl/unary_functor.cpp b/oneflow/core/functional/impl/unary_functor.cpp index 3d1733efce0..d0afe4ec8bf 100644 --- a/oneflow/core/functional/impl/unary_functor.cpp +++ b/oneflow/core/functional/impl/unary_functor.cpp @@ -180,7 +180,7 @@ ONEFLOW_FUNCTION_LIBRARY(m) { m.add_functor("Ceil"); ADD_UNARY_FUNCTOR_WITH_DY_X(Cos, "Cos"); ADD_UNARY_FUNCTOR_WITH_DY_X(Cosh, "Cosh"); - ADD_UNARY_FUNCTOR_WITH_DY_X(Digamma,"Digamma"); + ADD_UNARY_FUNCTOR_WITH_DY_X(Digamma, "Digamma"); ADD_UNARY_FUNCTOR_WITH_DY_X(Erf, "Erf"); ADD_UNARY_FUNCTOR_WITH_DY_X(Erfc, "Erfc"); ADD_UNARY_FUNCTOR_WITH_DY_X(Exp, "Exp"); diff --git a/oneflow/ir/include/OneFlow/OneFlowUserOps.td b/oneflow/ir/include/OneFlow/OneFlowUserOps.td index 4444fff724c..3358035dac0 100644 --- a/oneflow/ir/include/OneFlow/OneFlowUserOps.td +++ b/oneflow/ir/include/OneFlow/OneFlowUserOps.td @@ -4599,10 +4599,11 @@ def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoSideEffect, DeclareOpInterf def OneFlow_DigammaGradOp : OneFlow_BaseOp<"digamma_grad", [NoSideEffect, DeclareOpInterfaceMethods]> { let input = (ins - OneFlow_Tensor:$x + OneFlow_Tensor:$x, + OneFlow_Tensor:$dy ); let output = (outs - OneFlow_Tensor:$y + OneFlow_Tensor:$dx ); let has_logical_tensor_desc_infer_fn = 1; let has_physical_tensor_desc_infer_fn = 1; diff --git a/oneflow/user/ops/math_unary_elementwise_seq.h b/oneflow/user/ops/math_unary_elementwise_seq.h index 43bacc19477..9f518203095 100644 --- a/oneflow/user/ops/math_unary_elementwise_seq.h +++ b/oneflow/user/ops/math_unary_elementwise_seq.h @@ -76,7 +76,7 @@ namespace oneflow { OF_PP_MAKE_TUPLE_SEQ("expm1", Expm1) \ OF_PP_MAKE_TUPLE_SEQ("log", Log) \ OF_PP_MAKE_TUPLE_SEQ("lgamma", Lgamma) \ - OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma) \ + OF_PP_MAKE_TUPLE_SEQ("digamma", Digamma) \ OF_PP_MAKE_TUPLE_SEQ("log2", Log2) \ OF_PP_MAKE_TUPLE_SEQ("log10", Log10) \ OF_PP_MAKE_TUPLE_SEQ("log1p", Log1p) \ diff --git a/python/oneflow/special/special_ops.py b/python/oneflow/special/special_ops.py index 0d7017f8b65..809bd9bd858 100644 --- a/python/oneflow/special/special_ops.py +++ b/python/oneflow/special/special_ops.py @@ -60,5 +60,6 @@ def round(x: Tensor): def softmax(x: Tensor, dim: int): return oneflow._C.softmax(x, dim) -def digamma(x:Tensor): - return oneflow._C.digamma(x) \ No newline at end of file + +def digamma(x: Tensor): + return oneflow._C.digamma(x) diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py index 7db6a09a60f..fedc1f1145e 100644 --- a/python/oneflow/test/modules/test_special_ops.py +++ b/python/oneflow/test/modules/test_special_ops.py @@ -112,12 +112,12 @@ def test_flow_logsumexp_with_random_data(test_case): x = random_tensor(4, random(0, 5), 2).to(device) y = torch.special.logsumexp(x, dim=np.random.randint(0, 3)) return y - + # TODO:shijiaxing When the grad function be implemented, set "auto_backward=auto" @autotest(n=5, auto_backward=False) def test_flow_digamma_with_random_data(test_case): device = random_device() - x_dtype = random_dtype(["arithmetic"]) + x_dtype = random_dtype(["arithmetic","half"]) x = random_tensor().to(device).to(x_dtype) y = torch.special.digamma(x) From b1fe15bd829c6931770ef024f82a2491748abaeb Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 07:53:12 +0000 Subject: [PATCH 04/21] tensor.digamma api --- oneflow/api/python/framework/tensor_functions.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/oneflow/api/python/framework/tensor_functions.cpp b/oneflow/api/python/framework/tensor_functions.cpp index 12a0107b6c2..4f2f0a04de0 100644 --- a/oneflow/api/python/framework/tensor_functions.cpp +++ b/oneflow/api/python/framework/tensor_functions.cpp @@ -200,6 +200,7 @@ PyNumberMethods PyTensorObject_as_number = { } UNARY_METHOD(PyTensorObject_abs, functional::Abs); +UNARY_METHOD(PyTensorObject_digamma, functional::Digamma); UNARY_METHOD(PyTensorObject_exp, functional::Exp); UNARY_METHOD(PyTensorObject_exp2, functional::Exp2); UNARY_METHOD(PyTensorObject_floor, functional::Floor); @@ -788,9 +789,7 @@ static PyObject* PyTensorObject_to_global(PyObject* self, PyObject* args, PyObje PyObject* result = NULL; if (tensor->is_global()) result = PyTensorObject_global_to_global(self, args, kwargs); - else { - result = PyTensorObject_local_to_global(self, args, kwargs); - } + else { result = PyTensorObject_local_to_global(self, args, kwargs); } if (PyErr_Occurred()) { throw py::error_already_set(); } return result; @@ -1098,6 +1097,7 @@ PyMethodDef PyTensorObject_extra_methods[] = { // macro UNARY_METHOD {"abs", PyTensorObject_abs, METH_NOARGS, NULL}, + {"digamma", PyTensorObject_digamma, METH_NOARGS, NULL}, {"exp", PyTensorObject_exp, METH_NOARGS, NULL}, {"exp2", PyTensorObject_exp2, METH_NOARGS, NULL}, {"floor", PyTensorObject_floor, METH_NOARGS, NULL}, From 0e920827a3b46244e40010dd66e9812b5d717358 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 07:54:32 +0000 Subject: [PATCH 05/21] flow.digamma api --- python/oneflow/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/oneflow/__init__.py b/python/oneflow/__init__.py index 05f05e307d1..a68ccfa342a 100644 --- a/python/oneflow/__init__.py +++ b/python/oneflow/__init__.py @@ -103,6 +103,7 @@ def use_deterministic_algorithms(mode, *, warn_only=False): from oneflow._C import baddbmm from oneflow._C import broadcast_like from oneflow._C import chunk +from oneflow._C import digamma from oneflow._C import split from oneflow._C import sign from oneflow._C import sinh From a08eada3784001ec4aeb93fe46f114a3c7f5e0dd Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 08:00:03 +0000 Subject: [PATCH 06/21] fix test --- python/oneflow/test/modules/test_special_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py index fedc1f1145e..22513ad060d 100644 --- a/python/oneflow/test/modules/test_special_ops.py +++ b/python/oneflow/test/modules/test_special_ops.py @@ -120,6 +120,7 @@ def test_flow_digamma_with_random_data(test_case): x_dtype = random_dtype(["arithmetic","half"]) x = random_tensor().to(device).to(x_dtype) y = torch.special.digamma(x) + return y if __name__ == "__main__": From b60259f3c743deca3616f979b15a74a53a1e6472 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 08:28:40 +0000 Subject: [PATCH 07/21] unittest --- oneflow/core/ep/cuda/primitive/unary_functor.cuh | 8 +------- .../oneflow/test/modules/test_global_math_ops.py | 8 ++++++++ python/oneflow/test/modules/test_math_ops.py | 8 ++++++++ python/oneflow/test/modules/test_special_ops.py | 4 ++-- .../test/tensor/test_bfloat16_activation.py | 15 +++++++++++++++ python/oneflow/test/tensor/test_tensor_part_2.py | 7 +++++++ 6 files changed, 41 insertions(+), 9 deletions(-) diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index 3b29695632e..74527e363f6 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -233,13 +233,6 @@ struct UnaryFunctor { } }; -template<> -struct UnaryFunctor { - OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} - - OF_DEVICE_FUNC half operator()(half src) const { return calc_digamma_cuda(src); } -}; - template<> struct UnaryFunctor { @@ -369,6 +362,7 @@ SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kAtanh); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCeil); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCos); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kCosh); +SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kDigamma); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kErf); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kErfc); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kExp); diff --git a/python/oneflow/test/modules/test_global_math_ops.py b/python/oneflow/test/modules/test_global_math_ops.py index 624c60a398a..c4f7778059c 100644 --- a/python/oneflow/test/modules/test_global_math_ops.py +++ b/python/oneflow/test/modules/test_global_math_ops.py @@ -171,6 +171,13 @@ def _test_atan2(test_case, placement, sbp, ndim): z = torch.atan2(x, y) return z +# TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" +@autotest(n=1, auto_backward=False) +def _test_digamma(test_case, placement, sbp, ndim): + dim_list = [random(1, 3).to(int).value() * 8 for _ in range(ndim)] + x = random_tensor(ndim, *dim_list, low=0, high=10).to_global(placement, sbp) + y = torch.digamma(x) + return y class TestMathOps(flow.unittest.TestCase): @globaltest @@ -194,6 +201,7 @@ def test_math_ops(test_case): _test_acos(test_case, placement, sbp, ndim) _test_arccosh(test_case, placement, sbp, ndim) _test_acosh(test_case, placement, sbp, ndim) + _test_digamma(test_case, placement, sbp, ndim) _test_floordiv(test_case, placement, sbp, ndim) _test_atan2(test_case, placement, sbp, ndim) diff --git a/python/oneflow/test/modules/test_math_ops.py b/python/oneflow/test/modules/test_math_ops.py index 6463d0b3b0e..2355c233b0c 100644 --- a/python/oneflow/test/modules/test_math_ops.py +++ b/python/oneflow/test/modules/test_math_ops.py @@ -608,6 +608,14 @@ def test_log10_with_random_data(test_case): x = random_tensor().to(device) return torch.log10(x) +@flow.unittest.skip_unless_1n1d() +class TestDigammaModule(flow.unittest.TestCase): + # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" + @autotest(n=5, auto_backward=False) + def test_digamma_with_random_data(test_case): + device = random_device() + x = random_tensor().to(device) + return torch.digamma(x) if __name__ == "__main__": unittest.main() diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py index 22513ad060d..7f82a609830 100644 --- a/python/oneflow/test/modules/test_special_ops.py +++ b/python/oneflow/test/modules/test_special_ops.py @@ -113,11 +113,11 @@ def test_flow_logsumexp_with_random_data(test_case): y = torch.special.logsumexp(x, dim=np.random.randint(0, 3)) return y - # TODO:shijiaxing When the grad function be implemented, set "auto_backward=auto" + # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" @autotest(n=5, auto_backward=False) def test_flow_digamma_with_random_data(test_case): device = random_device() - x_dtype = random_dtype(["arithmetic","half"]) + x_dtype = random_dtype(["arithmetic","half","bfloat16"]) x = random_tensor().to(device).to(x_dtype) y = torch.special.digamma(x) return y diff --git a/python/oneflow/test/tensor/test_bfloat16_activation.py b/python/oneflow/test/tensor/test_bfloat16_activation.py index 0378d5bea3d..2b888cc31e3 100644 --- a/python/oneflow/test/tensor/test_bfloat16_activation.py +++ b/python/oneflow/test/tensor/test_bfloat16_activation.py @@ -690,6 +690,21 @@ def test_logsinmoid_with_random_data(test_case): rtol=1e-4, ) ) + + def test_digamma_with_random_data(test_case): + np_array = np.random.rand(4, 4) + x = flow.tensor(np_array, dtype=flow.bfloat16, device="cpu") + fp32_x = x.float() + y = flow.digamma(x) + fp32_y = flow.digamma(fp32_x) + test_case.assertTrue( + np.allclose( + y.float().numpy(), + fp32_y.bfloat16().float().numpy(), + atol=1e-4, + rtol=1e-4, + ) + ) if __name__ == "__main__": diff --git a/python/oneflow/test/tensor/test_tensor_part_2.py b/python/oneflow/test/tensor/test_tensor_part_2.py index 546bdcb2597..7b4a4b07092 100644 --- a/python/oneflow/test/tensor/test_tensor_part_2.py +++ b/python/oneflow/test/tensor/test_tensor_part_2.py @@ -935,6 +935,13 @@ def test_construct_global_tensor_by_numpy(test_case): x, placement=placement, sbp=[flow.sbp.split(0)], requires_grad=False, ) test_case.assertTrue(y_default_dtype.dtype == flow.int32) + + # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" + @autotest(n=5,auto_backward=False) + def test_digamma_tensor_with_random_data(test_case): + device = random_device() + x = random_tensor().to(device) + return x.digamma() @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases") From 2e0248e233e8d799f54431ac9988ffc96b796864 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 08:32:03 +0000 Subject: [PATCH 08/21] fmt --- .../primitive/broadcast_elementwise_binary.h | 20 +++++++++---------- .../core/ep/cuda/primitive/unary_functor.cuh | 1 - .../test/modules/test_global_math_ops.py | 2 ++ python/oneflow/test/modules/test_math_ops.py | 2 ++ .../oneflow/test/modules/test_special_ops.py | 4 ++-- .../test/tensor/test_bfloat16_activation.py | 2 +- .../oneflow/test/tensor/test_tensor_part_2.py | 4 ++-- 7 files changed, 19 insertions(+), 16 deletions(-) diff --git a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h index 9aad1dc068f..2b4ad33a9d1 100644 --- a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h +++ b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h @@ -131,16 +131,16 @@ inline bool IsDimsEquals(size_t num_src0_dims, const int64_t* src0_dims, size_t OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kAtanhBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCosBackwardWithDyX) -#define BINARY_MATH_BACKWARD_OP_SEQ_1 \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX)\ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX) \ +#define BINARY_MATH_BACKWARD_OP_SEQ_1 \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kCoshBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kErfcBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExp2BackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kExpm1BackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLgammaBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kDigammaBackwardWithDyX) \ + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLogBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kLog2BackwardWithDyX) #define BINARY_MATH_BACKWARD_OP_SEQ_2 \ diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index 74527e363f6..5dcbefac4c5 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -233,7 +233,6 @@ struct UnaryFunctor { } }; - template<> struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} diff --git a/python/oneflow/test/modules/test_global_math_ops.py b/python/oneflow/test/modules/test_global_math_ops.py index c4f7778059c..dbd46a27948 100644 --- a/python/oneflow/test/modules/test_global_math_ops.py +++ b/python/oneflow/test/modules/test_global_math_ops.py @@ -171,6 +171,7 @@ def _test_atan2(test_case, placement, sbp, ndim): z = torch.atan2(x, y) return z + # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" @autotest(n=1, auto_backward=False) def _test_digamma(test_case, placement, sbp, ndim): @@ -179,6 +180,7 @@ def _test_digamma(test_case, placement, sbp, ndim): y = torch.digamma(x) return y + class TestMathOps(flow.unittest.TestCase): @globaltest def test_math_ops(test_case): diff --git a/python/oneflow/test/modules/test_math_ops.py b/python/oneflow/test/modules/test_math_ops.py index 2355c233b0c..5c27662cc56 100644 --- a/python/oneflow/test/modules/test_math_ops.py +++ b/python/oneflow/test/modules/test_math_ops.py @@ -608,6 +608,7 @@ def test_log10_with_random_data(test_case): x = random_tensor().to(device) return torch.log10(x) + @flow.unittest.skip_unless_1n1d() class TestDigammaModule(flow.unittest.TestCase): # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" @@ -617,5 +618,6 @@ def test_digamma_with_random_data(test_case): x = random_tensor().to(device) return torch.digamma(x) + if __name__ == "__main__": unittest.main() diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py index 7f82a609830..f76b5b63997 100644 --- a/python/oneflow/test/modules/test_special_ops.py +++ b/python/oneflow/test/modules/test_special_ops.py @@ -113,11 +113,11 @@ def test_flow_logsumexp_with_random_data(test_case): y = torch.special.logsumexp(x, dim=np.random.randint(0, 3)) return y - # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" + # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" @autotest(n=5, auto_backward=False) def test_flow_digamma_with_random_data(test_case): device = random_device() - x_dtype = random_dtype(["arithmetic","half","bfloat16"]) + x_dtype = random_dtype(["arithmetic", "half", "bfloat16"]) x = random_tensor().to(device).to(x_dtype) y = torch.special.digamma(x) return y diff --git a/python/oneflow/test/tensor/test_bfloat16_activation.py b/python/oneflow/test/tensor/test_bfloat16_activation.py index 2b888cc31e3..642994e020d 100644 --- a/python/oneflow/test/tensor/test_bfloat16_activation.py +++ b/python/oneflow/test/tensor/test_bfloat16_activation.py @@ -690,7 +690,7 @@ def test_logsinmoid_with_random_data(test_case): rtol=1e-4, ) ) - + def test_digamma_with_random_data(test_case): np_array = np.random.rand(4, 4) x = flow.tensor(np_array, dtype=flow.bfloat16, device="cpu") diff --git a/python/oneflow/test/tensor/test_tensor_part_2.py b/python/oneflow/test/tensor/test_tensor_part_2.py index 7b4a4b07092..82bfa7e7276 100644 --- a/python/oneflow/test/tensor/test_tensor_part_2.py +++ b/python/oneflow/test/tensor/test_tensor_part_2.py @@ -935,9 +935,9 @@ def test_construct_global_tensor_by_numpy(test_case): x, placement=placement, sbp=[flow.sbp.split(0)], requires_grad=False, ) test_case.assertTrue(y_default_dtype.dtype == flow.int32) - + # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" - @autotest(n=5,auto_backward=False) + @autotest(n=5, auto_backward=False) def test_digamma_tensor_with_random_data(test_case): device = random_device() x = random_tensor().to(device) From 98ebc8be04417ba927b1586e74aa612a51b8d9c1 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 08:36:13 +0000 Subject: [PATCH 09/21] auto fmt --- oneflow/api/python/framework/tensor_functions.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/oneflow/api/python/framework/tensor_functions.cpp b/oneflow/api/python/framework/tensor_functions.cpp index 1e493d02db8..9f064516991 100644 --- a/oneflow/api/python/framework/tensor_functions.cpp +++ b/oneflow/api/python/framework/tensor_functions.cpp @@ -791,7 +791,9 @@ static PyObject* PyTensorObject_to_global(PyObject* self, PyObject* args, PyObje PyObject* result = NULL; if (tensor->is_global()) result = PyTensorObject_global_to_global(self, args, kwargs); - else { result = PyTensorObject_local_to_global(self, args, kwargs); } + else { + result = PyTensorObject_local_to_global(self, args, kwargs); + } if (PyErr_Occurred()) { throw py::error_already_set(); } return result; From d8e5b0f78ff8d08ff0e8303e455015df758bae86 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 09:12:55 +0000 Subject: [PATCH 10/21] add api psi --- python/oneflow/special/__init__.py | 1 + python/oneflow/special/special_ops.py | 3 +++ python/oneflow/test/modules/test_special_ops.py | 11 ++++++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/python/oneflow/special/__init__.py b/python/oneflow/special/__init__.py index dd3c369e3bc..940897460b4 100644 --- a/python/oneflow/special/__init__.py +++ b/python/oneflow/special/__init__.py @@ -26,3 +26,4 @@ from .special_ops import round from .special_ops import softmax from .special_ops import digamma +from .special_ops import psi diff --git a/python/oneflow/special/special_ops.py b/python/oneflow/special/special_ops.py index 809bd9bd858..4870e24c591 100644 --- a/python/oneflow/special/special_ops.py +++ b/python/oneflow/special/special_ops.py @@ -63,3 +63,6 @@ def softmax(x: Tensor, dim: int): def digamma(x: Tensor): return oneflow._C.digamma(x) + +def psi(x: Tensor): + return oneflow._C.digamma(x) \ No newline at end of file diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py index f76b5b63997..210c9be73e6 100644 --- a/python/oneflow/test/modules/test_special_ops.py +++ b/python/oneflow/test/modules/test_special_ops.py @@ -117,10 +117,19 @@ def test_flow_logsumexp_with_random_data(test_case): @autotest(n=5, auto_backward=False) def test_flow_digamma_with_random_data(test_case): device = random_device() - x_dtype = random_dtype(["arithmetic", "half", "bfloat16"]) + x_dtype = random_dtype(["arithmetic", "half"]) x = random_tensor().to(device).to(x_dtype) y = torch.special.digamma(x) return y + + # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" + @autotest(n=5, auto_backward=False) + def test_flow_psi_with_random_data(test_case): + device = random_device() + x_dtype = random_dtype(["arithmetic", "half"]) + x = random_tensor().to(device).to(x_dtype) + y = torch.special.psi(x) + return y if __name__ == "__main__": From 98cf0a7f72d7fcb3d16957fd00ba0e6734eb7a6b Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 09:13:08 +0000 Subject: [PATCH 11/21] docstr --- docs/source/special.rst | 1 + docs/source/tensor.rst | 1 + python/oneflow/framework/docstr/math_ops.py | 21 +++++++++++++++++++ .../oneflow/framework/docstr/special_ops.py | 14 +++++++++++++ python/oneflow/framework/docstr/tensor.py | 7 +++++++ 5 files changed, 44 insertions(+) diff --git a/docs/source/special.rst b/docs/source/special.rst index 810a818ac9b..459a60ce8e7 100644 --- a/docs/source/special.rst +++ b/docs/source/special.rst @@ -8,6 +8,7 @@ The oneflow.special module, modeled after SciPy's special module. :toctree: generated :nosignatures: + digamma erf erfc erfinv diff --git a/docs/source/tensor.rst b/docs/source/tensor.rst index 9684082b444..7fa220e8d03 100644 --- a/docs/source/tensor.rst +++ b/docs/source/tensor.rst @@ -225,6 +225,7 @@ Tensor class reference Tensor.div_ Tensor.double Tensor.dtype + Tensor.digamma Tensor.element_size Tensor.eq Tensor.equal diff --git a/python/oneflow/framework/docstr/math_ops.py b/python/oneflow/framework/docstr/math_ops.py index d7a39e2f1b3..9717c01a773 100644 --- a/python/oneflow/framework/docstr/math_ops.py +++ b/python/oneflow/framework/docstr/math_ops.py @@ -1972,3 +1972,24 @@ tensor([3., 0., -0., -0.], dtype=oneflow.float32) """, ) + +add_docstr( + oneflow.digamma, + r"""digamma(input) -> Tensor + + .. math:: + \digamma(x) = \frac{d}{dx} \ln\left(\Gamma\left(x\right)\right) = \frac{\Gamma'(x)}{\Gamma(x)} + + Args: + input (Tensor): the tensor to compute the digamma function on + + .. note:: This function is similar to SciPy's `scipy.special.digamma`. + + Example:: + + >>> import oneflow as flow + >>> a = flow.tensor([1, 0.5, 0, -2.1]) + >>> flow.digamma(a) + tensor([-5.7722e-01, -1.9635e+00, -inf, 1.0630e+01], dtype=oneflow.float32) + """, +) diff --git a/python/oneflow/framework/docstr/special_ops.py b/python/oneflow/framework/docstr/special_ops.py index 7807312ab0c..9f9336458d3 100644 --- a/python/oneflow/framework/docstr/special_ops.py +++ b/python/oneflow/framework/docstr/special_ops.py @@ -16,6 +16,13 @@ import oneflow from oneflow.framework.docstr.utils import add_docstr +add_docstr( + oneflow.special.digamma, + """ + Alias for :func:`oneflow.digamma`. + """, +) + add_docstr( oneflow.special.erf, """ @@ -85,3 +92,10 @@ Alias for :func:`oneflow.softmax`. """, ) + +add_docstr( + oneflow.special.psi, + """ + Alias for :func:`oneflow.special.digamma`. + """, +) \ No newline at end of file diff --git a/python/oneflow/framework/docstr/tensor.py b/python/oneflow/framework/docstr/tensor.py index 34c657d99a7..c4f58577f48 100644 --- a/python/oneflow/framework/docstr/tensor.py +++ b/python/oneflow/framework/docstr/tensor.py @@ -2770,3 +2770,10 @@ In-place version of :func:`oneflow.Tensor.frac`. """, ) + +add_docstr( + oneflow.Tensor.digamma, + """ + See :func:`oneflow.digamma` + """, +) \ No newline at end of file From b0702e2011c337514f6b03dc11aaee05aa8dbcc2 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 10:17:08 +0000 Subject: [PATCH 12/21] fmt --- python/oneflow/framework/docstr/special_ops.py | 2 +- python/oneflow/framework/docstr/tensor.py | 2 +- python/oneflow/special/special_ops.py | 3 ++- python/oneflow/test/modules/test_special_ops.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/oneflow/framework/docstr/special_ops.py b/python/oneflow/framework/docstr/special_ops.py index 9f9336458d3..1cbdba38e20 100644 --- a/python/oneflow/framework/docstr/special_ops.py +++ b/python/oneflow/framework/docstr/special_ops.py @@ -98,4 +98,4 @@ """ Alias for :func:`oneflow.special.digamma`. """, -) \ No newline at end of file +) diff --git a/python/oneflow/framework/docstr/tensor.py b/python/oneflow/framework/docstr/tensor.py index c4f58577f48..82d001aec74 100644 --- a/python/oneflow/framework/docstr/tensor.py +++ b/python/oneflow/framework/docstr/tensor.py @@ -2776,4 +2776,4 @@ """ See :func:`oneflow.digamma` """, -) \ No newline at end of file +) diff --git a/python/oneflow/special/special_ops.py b/python/oneflow/special/special_ops.py index 4870e24c591..4224324ec1c 100644 --- a/python/oneflow/special/special_ops.py +++ b/python/oneflow/special/special_ops.py @@ -64,5 +64,6 @@ def softmax(x: Tensor, dim: int): def digamma(x: Tensor): return oneflow._C.digamma(x) + def psi(x: Tensor): - return oneflow._C.digamma(x) \ No newline at end of file + return oneflow._C.digamma(x) diff --git a/python/oneflow/test/modules/test_special_ops.py b/python/oneflow/test/modules/test_special_ops.py index 210c9be73e6..84b5fd50c1f 100644 --- a/python/oneflow/test/modules/test_special_ops.py +++ b/python/oneflow/test/modules/test_special_ops.py @@ -121,7 +121,7 @@ def test_flow_digamma_with_random_data(test_case): x = random_tensor().to(device).to(x_dtype) y = torch.special.digamma(x) return y - + # TODO:shijiaxing When the grad function be implemented, rm "auto_backward=False" @autotest(n=5, auto_backward=False) def test_flow_psi_with_random_data(test_case): From c1759ec0dba46d0754a1d511eb2b5a9628a9daa7 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sun, 2 Apr 2023 11:46:26 +0000 Subject: [PATCH 13/21] fix docstr --- python/oneflow/framework/docstr/math_ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/oneflow/framework/docstr/math_ops.py b/python/oneflow/framework/docstr/math_ops.py index 9717c01a773..7160f21dc99 100644 --- a/python/oneflow/framework/docstr/math_ops.py +++ b/python/oneflow/framework/docstr/math_ops.py @@ -1988,8 +1988,9 @@ Example:: >>> import oneflow as flow - >>> a = flow.tensor([1, 0.5, 0, -2.1]) + >>> a = flow.tensor([1, 0.5]) >>> flow.digamma(a) - tensor([-5.7722e-01, -1.9635e+00, -inf, 1.0630e+01], dtype=oneflow.float32) + tensor([-0.5772, -1.9635], dtype=oneflow.float32) + """, ) From 4b41d24b872edc4c9bd5b6096c18d860705cc300 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Mon, 3 Apr 2023 04:45:36 +0000 Subject: [PATCH 14/21] refine --- oneflow/core/common/math_util.cpp | 111 --------------- oneflow/core/common/math_util.h | 59 -------- .../core/ep/cpu/primitive/binary_functor.h | 2 +- oneflow/core/ep/cpu/primitive/unary_functor.h | 128 +++++++++++++++++- .../core/ep/cuda/primitive/binary_functor.cuh | 2 +- .../core/ep/cuda/primitive/unary_functor.cuh | 53 +++++++- 6 files changed, 178 insertions(+), 177 deletions(-) diff --git a/oneflow/core/common/math_util.cpp b/oneflow/core/common/math_util.cpp index f5ce19d4491..694c8bf1e3b 100644 --- a/oneflow/core/common/math_util.cpp +++ b/oneflow/core/common/math_util.cpp @@ -29,116 +29,5 @@ int64_t Gcd(int64_t m, int64_t n) { int64_t Lcm(int64_t m, int64_t n) { return m * n / Gcd(m, n); } -template -T polevl(const T x, const T A[], size_t len) { - T result = 0; - for (size_t i = 0; i <= len; i++) { result = result * x + A[i]; } - return result; -} - -/* - * This function is derived from the implementation of the digamma function in the Cephes Math - * Library. See note [3-Clause BSD License for the Cephes Math Library]. - */ - -double calc_digamma_cpu(double x) { - static double PSI_10 = 2.25175258906672110764; - if (x == 0) { - // As per C++ standard for gamma related functions and SciPy, - // If the argument is ±0, ±∞ is returned - return std::copysign(INFINITY, -x); - } - - bool x_is_integer = x == trunc(x); - if (x < 0) { - if (x_is_integer) { - // As per C++ standard for gamma related functions and SciPy, - // If the argument is a negative integer, NaN is returned - return std::numeric_limits::quiet_NaN(); - } - // Extracts the fractional part of x as r, since tan(pi * r) is more numerically - // accurate than tan(pi * x). While these operations are mathematically equivalent - // since both x and r are in radians and tan() has a periodicity of pi, in practice - // the computation of pi * x is a source of error (when |x| > 1). - double q, r; - r = std::modf(x, &q); - return calc_digamma_cpu(1 - x) - pi / tan(pi * r); - } - - // Push x to be >= 10 - double result = 0; - while (x < 10) { - result -= 1 / x; - x += 1; - } - if (x == 10) { return result + PSI_10; } - - // Compute asymptotic digamma - static const double A[] = { - 8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, - -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, - 8.33333333333333333333E-2, - }; - - double y = 0; - if (x < 1.0e17) { - double z = 1.0 / (x * x); - y = z * polevl(z, A, 6); - } - return result + log(x) - (0.5 / x) - y; -} - -/* - * This function is derived from the implementation of the digamma function in the Cephes Math - * Library. See note [3-Clause BSD License for the Cephes Math Library]. - */ - -float calc_digamma_cpu(float x) { - static float PSI_10 = 2.25175258906672110764f; - if (x == 0) { - // As per C++ standard for gamma related functions and SciPy, - // If the argument is ±0, ±∞ is returned - return std::copysign(INFINITY, -x); - } - - bool x_is_integer = x == truncf(x); - if (x < 0) { - if (x_is_integer) { - // As per C++ standard for gamma related functions and SciPy, - // If the argument is a negative integer, NaN is returned - return std::numeric_limits::quiet_NaN(); - } - // Extracts the fractional part of x as r, since tan(pi * r) is more numerically - // accurate than tan(pi * x). While these operations are mathematically equivalent - // since both x and r are in radians and tan() has a periodicity of pi, in practice - // the computation of pi * x is a source of error (when |x| > 1). - double q, r; - r = std::modf(x, &q); - float pi_over_tan_pi_x = (float)(pi / tan(pi * r)); - return calc_digamma_cpu(1 - x) - pi_over_tan_pi_x; - } - - // Push x to be >= 10 - float result = 0; - while (x < 10) { - result -= 1 / x; - x += 1; - } - if (x == 10) { return result + PSI_10; } - - // Compute asymptotic digamma - static const float A[] = { - 8.33333333333333333333E-2f, -2.10927960927960927961E-2f, 7.57575757575757575758E-3f, - -4.16666666666666666667E-3f, 3.96825396825396825397E-3f, -8.33333333333333333333E-3f, - 8.33333333333333333333E-2f, - }; - - float y = 0; - if (x < 1.0e17f) { - float z = 1 / (x * x); - y = z * polevl(z, A, 6); - } - return result + logf(x) - (0.5f / x) - y; -} } // namespace oneflow diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h index 4d8461a15e8..4086fe56607 100644 --- a/oneflow/core/common/math_util.h +++ b/oneflow/core/common/math_util.h @@ -31,66 +31,7 @@ int64_t Gcd(int64_t m, int64_t n); int64_t Lcm(int64_t m, int64_t n); -template -T polevl(const T x, const T A[], size_t len); - -// This function references pytorch/aten/src/ATen/native/Math.h -double calc_digamma_cpu(double x); - -float calc_digamma_cpu(float x); - -template -OF_DEVICE_FUNC scalar_t calc_digamma_cuda(scalar_t in) { - static const double PI_f64 = 3.14159265358979323846; - const accscalar_t PSI_10 = 2.25175258906672110764; - const accscalar_t A[] = { - 8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, - -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, - 8.33333333333333333333E-2, - }; - - accscalar_t x = static_cast(in); - if (x == static_cast(0)) { - // As per C++ standard for gamma related functions and SciPy, - // If the argument is ±0, ±∞ is returned - return std::copysign(static_cast(INFINITY), -x); - } - bool x_is_integer = x == trunc(x); - accscalar_t result = static_cast(0); - if (x < 0) { - if (x_is_integer) { - // As per C++ standard for gamma related functions and SciPy, - // If the argument is a negative integer, NaN is returned - return static_cast(NAN); - } - // Extracts the fractional part of x as r, since tan(pi * r) is more numerically - // accurate than tan(pi * x). While these operations are mathematically equivalent - // since both x and r are in radians and tan() has a periodicity of pi, in practice - // the computation of pi * x is a source of error (when |x| > 1). - double q, r; - r = modf(static_cast(x), &q); - result = static_cast(-PI_f64 / tan(PI_f64 * r)); - x = static_cast(1) - x; - } - - while (x < 10) { - result -= static_cast(1) / x; - x += 1; - } - if (x == static_cast(10)) { return static_cast(result + PSI_10); } - - accscalar_t y = 0; - if (x < 1.0e17) { - accscalar_t z = static_cast(1) / (x * x); - - accscalar_t polevl_result = 0; - for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } - y = z * polevl_result; - } - - return static_cast(log(x) - (static_cast(0.5) / x) - y + result); -} template OF_DEVICE_FUNC T DeviceMin(T a, T b) { diff --git a/oneflow/core/ep/cpu/primitive/binary_functor.h b/oneflow/core/ep/cpu/primitive/binary_functor.h index 2c748c58968..a7d14c9deeb 100644 --- a/oneflow/core/ep/cpu/primitive/binary_functor.h +++ b/oneflow/core/ep/cpu/primitive/binary_functor.h @@ -358,7 +358,7 @@ struct BinaryFunctor { } }; -template -struct UnaryFunctor { +template<> +struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} - OF_DEVICE_FUNC Dst operator()(Src src) const { return static_cast(calc_digamma_cpu(src)); } + OF_DEVICE_FUNC float operator()(float src) const { + const auto& calc_digamma = [](float x) { + std::function compute; + compute = [&](float x) { + static float PSI_10 = 2.25175258906672110764f; + if (x == 0) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is ±0, ±∞ is returned + return std::copysign(INFINITY, -x); + } + + bool x_is_integer = x == truncf(x); + if (x < 0) { + if (x_is_integer) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is a negative integer, NaN is returned + return std::numeric_limits::quiet_NaN(); + } + // Extracts the fractional part of x as r, since tan(pi * r) is more numerically + // accurate than tan(pi * x). While these operations are mathematically equivalent + // since both x and r are in radians and tan() has a periodicity of pi, in practice + // the computation of pi * x is a source of error (when |x| > 1). + double q, r; + r = std::modf(x, &q); + float pi_over_tan_pi_x = (float)(pi / tan(pi * r)); + return compute(1 - x) - pi_over_tan_pi_x; + } + + // Push x to be >= 10 + float result = 0; + while (x < 10) { + result -= 1 / x; + x += 1; + } + if (x == 10) { return result + PSI_10; } + + // Compute asymptotic digamma + static const float A[] = { + 8.33333333333333333333E-2f, -2.10927960927960927961E-2f, 7.57575757575757575758E-3f, + -4.16666666666666666667E-3f, 3.96825396825396825397E-3f, -8.33333333333333333333E-3f, + 8.33333333333333333333E-2f, + }; + + float y = 0; + if (x < 1.0e17f) { + float z = 1 / (x * x); + float polevl_result = 0; + for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } + y = z * polevl_result; + } + return result + logf(x) - (0.5f / x) - y; + }; + + return compute(x); + }; + + return calc_digamma(src); + } +}; + +template<> +struct UnaryFunctor { + OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} + + OF_DEVICE_FUNC double operator()(double src) const { + const auto& calc_digamma = [](double x) { + std::function compute; + compute = [&](double x) { + static double PSI_10 = 2.25175258906672110764; + if (x == 0) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is ±0, ±∞ is returned + return std::copysign(INFINITY, -x); + } + + bool x_is_integer = x == trunc(x); + if (x < 0) { + if (x_is_integer) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is a negative integer, NaN is returned + return std::numeric_limits::quiet_NaN(); + } + // Extracts the fractional part of x as r, since tan(pi * r) is more numerically + // accurate than tan(pi * x). While these operations are mathematically equivalent + // since both x and r are in radians and tan() has a periodicity of pi, in practice + // the computation of pi * x is a source of error (when |x| > 1). + double q, r; + r = std::modf(x, &q); + return compute(1 - x) - pi / tan(pi * r); + } + + // Push x to be >= 10 + double result = 0; + while (x < 10) { + result -= 1 / x; + x += 1; + } + if (x == 10) { return result + PSI_10; } + + // Compute asymptotic digamma + static const double A[] = { + 8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, + -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, + 8.33333333333333333333E-2, + }; + + double y = 0; + if (x < 1.0e17) { + double z = 1.0 / (x * x); + // y = z * polevl(z, A, 6); + + double polevl_result = 0; + for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } + y = z * polevl_result; + } + return result + log(x) - (0.5 / x) - y; + }; + + return compute(x); + }; + + return calc_digamma(src); + } }; template<> diff --git a/oneflow/core/ep/cuda/primitive/binary_functor.cuh b/oneflow/core/ep/cuda/primitive/binary_functor.cuh index 252eb3b418f..89b65607423 100644 --- a/oneflow/core/ep/cuda/primitive/binary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/binary_functor.cuh @@ -245,7 +245,7 @@ struct BinaryFunctor(0.0); } }; diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index 5dcbefac4c5..96e28827979 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -228,8 +228,57 @@ template struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} - OF_DEVICE_FUNC Dst operator()(Src src) const { - return static_cast(calc_digamma_cuda(src)); + OF_DEVICE_FUNC Dst operator()(Src in) const { + + static const double PI_f64 = 3.14159265358979323846; + const Src PSI_10 = 2.25175258906672110764; + const Src A[] = { + 8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, + -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, + 8.33333333333333333333E-2, + }; + + Src x = static_cast(in); + if (x == static_cast(0)) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is ±0, ±∞ is returned + return std::copysign(static_cast(INFINITY), -x); + } + + bool x_is_integer = x == trunc(x); + Src result = static_cast(0); + if (x < 0) { + if (x_is_integer) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is a negative integer, NaN is returned + return static_cast(NAN); + } + // Extracts the fractional part of x as r, since tan(pi * r) is more numerically + // accurate than tan(pi * x). While these operations are mathematically equivalent + // since both x and r are in radians and tan() has a periodicity of pi, in practice + // the computation of pi * x is a source of error (when |x| > 1). + double q, r; + r = modf(static_cast(x), &q); + result = static_cast(-PI_f64 / tan(PI_f64 * r)); + x = static_cast(1) - x; + } + + while (x < 10) { + result -= static_cast(1) / x; + x += 1; + } + if (x == static_cast(10)) { return static_cast(result + PSI_10); } + + Src y = 0; + if (x < 1.0e17) { + Src z = static_cast(1) / (x * x); + + Src polevl_result = 0; + for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } + y = z * polevl_result; + } + + return static_cast(log(x) - (static_cast(0.5) / x) - y + result); } }; From 17fe31ab46cb4a744fbe6a5e9cb2406903e550d9 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Mon, 3 Apr 2023 04:46:47 +0000 Subject: [PATCH 15/21] fmt --- oneflow/core/common/math_util.cpp | 1 - oneflow/core/common/math_util.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/oneflow/core/common/math_util.cpp b/oneflow/core/common/math_util.cpp index 694c8bf1e3b..a60f37b6400 100644 --- a/oneflow/core/common/math_util.cpp +++ b/oneflow/core/common/math_util.cpp @@ -29,5 +29,4 @@ int64_t Gcd(int64_t m, int64_t n) { int64_t Lcm(int64_t m, int64_t n) { return m * n / Gcd(m, n); } - } // namespace oneflow diff --git a/oneflow/core/common/math_util.h b/oneflow/core/common/math_util.h index 4086fe56607..f19e20ff45b 100644 --- a/oneflow/core/common/math_util.h +++ b/oneflow/core/common/math_util.h @@ -31,8 +31,6 @@ int64_t Gcd(int64_t m, int64_t n); int64_t Lcm(int64_t m, int64_t n); - - template OF_DEVICE_FUNC T DeviceMin(T a, T b) { #if defined(__CUDA_ARCH__) From 8b80a76acda6446742fb830efe272e3834576199 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Mon, 3 Apr 2023 04:47:51 +0000 Subject: [PATCH 16/21] fmt --- .../core/ep/cpu/primitive/binary_functor.h | 2 +- oneflow/core/ep/cpu/primitive/unary_functor.h | 10 +-- .../core/ep/cuda/primitive/unary_functor.cuh | 85 +++++++++---------- 3 files changed, 48 insertions(+), 49 deletions(-) diff --git a/oneflow/core/ep/cpu/primitive/binary_functor.h b/oneflow/core/ep/cpu/primitive/binary_functor.h index a7d14c9deeb..e80d63569f8 100644 --- a/oneflow/core/ep/cpu/primitive/binary_functor.h +++ b/oneflow/core/ep/cpu/primitive/binary_functor.h @@ -358,7 +358,7 @@ struct BinaryFunctor { float y = 0; if (x < 1.0e17f) { float z = 1 / (x * x); - float polevl_result = 0; - for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } - y = z * polevl_result; + float polevl_result = 0; + for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } + y = z * polevl_result; } return result + logf(x) - (0.5f / x) - y; }; @@ -237,8 +237,8 @@ struct UnaryFunctor { // y = z * polevl(z, A, 6); double polevl_result = 0; - for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } - y = z * polevl_result; + for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } + y = z * polevl_result; } return result + log(x) - (0.5 / x) - y; }; diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index 96e28827979..c1169271810 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -229,56 +229,55 @@ struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} OF_DEVICE_FUNC Dst operator()(Src in) const { - static const double PI_f64 = 3.14159265358979323846; - const Src PSI_10 = 2.25175258906672110764; - const Src A[] = { - 8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, - -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, - 8.33333333333333333333E-2, - }; - - Src x = static_cast(in); - if (x == static_cast(0)) { - // As per C++ standard for gamma related functions and SciPy, - // If the argument is ±0, ±∞ is returned - return std::copysign(static_cast(INFINITY), -x); - } - - bool x_is_integer = x == trunc(x); - Src result = static_cast(0); - if (x < 0) { - if (x_is_integer) { + const Src PSI_10 = 2.25175258906672110764; + const Src A[] = { + 8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, + -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, + 8.33333333333333333333E-2, + }; + + Src x = static_cast(in); + if (x == static_cast(0)) { // As per C++ standard for gamma related functions and SciPy, - // If the argument is a negative integer, NaN is returned - return static_cast(NAN); + // If the argument is ±0, ±∞ is returned + return std::copysign(static_cast(INFINITY), -x); } - // Extracts the fractional part of x as r, since tan(pi * r) is more numerically - // accurate than tan(pi * x). While these operations are mathematically equivalent - // since both x and r are in radians and tan() has a periodicity of pi, in practice - // the computation of pi * x is a source of error (when |x| > 1). - double q, r; - r = modf(static_cast(x), &q); - result = static_cast(-PI_f64 / tan(PI_f64 * r)); - x = static_cast(1) - x; - } - while (x < 10) { - result -= static_cast(1) / x; - x += 1; - } - if (x == static_cast(10)) { return static_cast(result + PSI_10); } + bool x_is_integer = x == trunc(x); + Src result = static_cast(0); + if (x < 0) { + if (x_is_integer) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is a negative integer, NaN is returned + return static_cast(NAN); + } + // Extracts the fractional part of x as r, since tan(pi * r) is more numerically + // accurate than tan(pi * x). While these operations are mathematically equivalent + // since both x and r are in radians and tan() has a periodicity of pi, in practice + // the computation of pi * x is a source of error (when |x| > 1). + double q, r; + r = modf(static_cast(x), &q); + result = static_cast(-PI_f64 / tan(PI_f64 * r)); + x = static_cast(1) - x; + } - Src y = 0; - if (x < 1.0e17) { - Src z = static_cast(1) / (x * x); + while (x < 10) { + result -= static_cast(1) / x; + x += 1; + } + if (x == static_cast(10)) { return static_cast(result + PSI_10); } - Src polevl_result = 0; - for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } - y = z * polevl_result; - } + Src y = 0; + if (x < 1.0e17) { + Src z = static_cast(1) / (x * x); + + Src polevl_result = 0; + for (int i = 0; i <= 6; i++) { polevl_result = polevl_result * z + A[i]; } + y = z * polevl_result; + } - return static_cast(log(x) - (static_cast(0.5) / x) - y + result); + return static_cast(log(x) - (static_cast(0.5) / x) - y + result); } }; From e1269531f4580143e886dc8ae3d2b8f0295e7885 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Fri, 7 Apr 2023 02:48:43 +0000 Subject: [PATCH 17/21] add references --- oneflow/core/ep/cpu/primitive/unary_functor.h | 2 ++ oneflow/core/ep/cuda/primitive/unary_functor.cuh | 1 + oneflow/core/functional/functional_api.yaml | 3 +-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h index a5ec7a61402..34c79a33492 100644 --- a/oneflow/core/ep/cpu/primitive/unary_functor.h +++ b/oneflow/core/ep/cpu/primitive/unary_functor.h @@ -126,6 +126,7 @@ struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} OF_DEVICE_FUNC float operator()(float src) const { + // references https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L434-L487 const auto& calc_digamma = [](float x) { std::function compute; compute = [&](float x) { @@ -190,6 +191,7 @@ struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} OF_DEVICE_FUNC double operator()(double src) const { + // references https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L376-L428 const auto& calc_digamma = [](double x) { std::function compute; compute = [&](double x) { diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index c1169271810..e3bb1b7c8f6 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -229,6 +229,7 @@ struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} OF_DEVICE_FUNC Dst operator()(Src in) const { + // references https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/cuda/Math.cuh#L3029-L3090 static const double PI_f64 = 3.14159265358979323846; const Src PSI_10 = 2.25175258906672110764; const Src A[] = { diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml index b77157a8e9d..90818166592 100644 --- a/oneflow/core/functional/functional_api.yaml +++ b/oneflow/core/functional/functional_api.yaml @@ -3317,5 +3317,4 @@ - name: "digamma_grad" signature: "Tensor (Tensor x, Tensor dy) => DigammaGrad" - bind_python: False - \ No newline at end of file + bind_python: False \ No newline at end of file From 809892db2822a4fb841f1966ab07092b97c45e85 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Fri, 7 Apr 2023 02:58:08 +0000 Subject: [PATCH 18/21] fmt --- oneflow/core/ep/cpu/primitive/unary_functor.h | 6 ++++-- oneflow/core/ep/cuda/primitive/unary_functor.cuh | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h index 34c79a33492..e5e62ec645c 100644 --- a/oneflow/core/ep/cpu/primitive/unary_functor.h +++ b/oneflow/core/ep/cpu/primitive/unary_functor.h @@ -126,7 +126,8 @@ struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} OF_DEVICE_FUNC float operator()(float src) const { - // references https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L434-L487 + // references + // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L434-L487 const auto& calc_digamma = [](float x) { std::function compute; compute = [&](float x) { @@ -191,7 +192,8 @@ struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} OF_DEVICE_FUNC double operator()(double src) const { - // references https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L376-L428 + // references + // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/Math.h#L376-L428 const auto& calc_digamma = [](double x) { std::function compute; compute = [&](double x) { diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index e3bb1b7c8f6..3c4ed58055a 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -229,7 +229,8 @@ struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} OF_DEVICE_FUNC Dst operator()(Src in) const { - // references https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/cuda/Math.cuh#L3029-L3090 + // references + // https://github.com/pytorch/pytorch/blob/release/1.13/aten/src/ATen/native/cuda/Math.cuh#L3029-L3090 static const double PI_f64 = 3.14159265358979323846; const Src PSI_10 = 2.25175258906672110764; const Src A[] = { From 7130878dc4d2be9e0ed1a819f3cba3d55f6f379c Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Fri, 7 Apr 2023 09:31:33 +0000 Subject: [PATCH 19/21] fix build --- oneflow/core/ep/cuda/primitive/binary_functor.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oneflow/core/ep/cuda/primitive/binary_functor.cuh b/oneflow/core/ep/cuda/primitive/binary_functor.cuh index 89b65607423..252eb3b418f 100644 --- a/oneflow/core/ep/cuda/primitive/binary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/binary_functor.cuh @@ -245,7 +245,7 @@ struct BinaryFunctor(0.0); } }; From d1f63ea66cf81305d93264e4ac0735c4ea0fdb51 Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Fri, 7 Apr 2023 09:33:26 +0000 Subject: [PATCH 20/21] fmt --- oneflow/core/functional/functional_api.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml index 90818166592..1352b6dedcd 100644 --- a/oneflow/core/functional/functional_api.yaml +++ b/oneflow/core/functional/functional_api.yaml @@ -3317,4 +3317,4 @@ - name: "digamma_grad" signature: "Tensor (Tensor x, Tensor dy) => DigammaGrad" - bind_python: False \ No newline at end of file + bind_python: False From 190aabb641900cd62bb6be786094969371e6b73c Mon Sep 17 00:00:00 2001 From: youxiudeshouyeren <1929724847@qq.com> Date: Sat, 8 Apr 2023 03:21:13 +0000 Subject: [PATCH 21/21] fix --- oneflow/ir/include/OneFlow/OneFlowUserOps.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oneflow/ir/include/OneFlow/OneFlowUserOps.td b/oneflow/ir/include/OneFlow/OneFlowUserOps.td index 5d9600b1fb8..c14b13e3258 100644 --- a/oneflow/ir/include/OneFlow/OneFlowUserOps.td +++ b/oneflow/ir/include/OneFlow/OneFlowUserOps.td @@ -4585,7 +4585,7 @@ def OneFlow_LgammaGradOp : OneFlow_BaseOp<"lgamma_grad", [NoMemoryEffect, Declar } -def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoSideEffect, DeclareOpInterfaceMethods]> { +def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoMemoryEffect, DeclareOpInterfaceMethods]> { let input = (ins OneFlow_Tensor:$x ); @@ -4598,7 +4598,7 @@ def OneFlow_DigammaOp : OneFlow_BaseOp<"digamma", [NoSideEffect, DeclareOpInterf let has_data_type_infer_fn = 1; } -def OneFlow_DigammaGradOp : OneFlow_BaseOp<"digamma_grad", [NoSideEffect, DeclareOpInterfaceMethods]> { +def OneFlow_DigammaGradOp : OneFlow_BaseOp<"digamma_grad", [NoMemoryEffect, DeclareOpInterfaceMethods]> { let input = (ins OneFlow_Tensor:$x, OneFlow_Tensor:$dy