[libclc] Optimize isfpclass-like CLC builtins (#124145)

frasercrmck · web-flow · commit a8c82d5fde1d · 2025-01-28T16:23:52.000Z
The builtins we were using to implement __clc_is(finite|inf|nan|normal)
-- __builtin_isfinite, etc. -- don't take vector types so we were
previously scalarizing. The __builtin_isfpclass builtin does take vector
types and thus allows us to keep things in vectors.

There is no change in codegen to the scalar versions of any of these
builtins.
diff --git a/libclc/clc/include/clc/relational/relational.h b/libclc/clc/include/clc/relational/relational.h
@@ -6,63 +6,6 @@
  * when the result is true.
  */
 
-#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME,  \
-                                            ARG_TYPE)                          \
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
-    return BUILTIN_NAME(x);                                                    \
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE)        \
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
-    return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} !=            \
-                      (RET_TYPE)0);                                            \
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE)        \
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
-    return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1),               \
-                                 FUNCTION(x.s2)} != (RET_TYPE)0);              \
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE)        \
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
-    return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1),               \
-                                 FUNCTION(x.s2),                               \
-                                 FUNCTION(x.s3)} != (RET_TYPE)0);              \
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE)        \
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
-    return (                                                                   \
-        RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2),   \
-                             FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5),   \
-                             FUNCTION(x.s6), FUNCTION(x.s7)} != (RET_TYPE)0);  \
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE)       \
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
-    return (                                                                   \
-        RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2),   \
-                             FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5),   \
-                             FUNCTION(x.s6), FUNCTION(x.s7), FUNCTION(x.s8),   \
-                             FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb),   \
-                             FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se),   \
-                             FUNCTION(x.sf)} != (RET_TYPE)0);                  \
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)     \
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2)        \
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3)        \
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4)        \
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8)        \
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16)
-
-#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION,     \
-                                     ARG_TYPE)                                 \
-  _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION,    \
-                                      ARG_TYPE)                                \
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)
-
 #define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
                                              ARG1_TYPE, ARG2_TYPE)             \
   _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) {         \
@@ -89,4 +32,29 @@
     return _CLC_RELATIONAL_OP(x, y);                                           \
   }
 
+#define fcNan (__FPCLASS_SNAN | __FPCLASS_QNAN)
+#define fcInf (__FPCLASS_POSINF | __FPCLASS_NEGINF)
+#define fcNormal (__FPCLASS_POSNORMAL | __FPCLASS_NEGNORMAL)
+#define fcPosFinite                                                            \
+  (__FPCLASS_POSNORMAL | __FPCLASS_POSSUBNORMAL | __FPCLASS_POSZERO)
+#define fcNegFinite                                                            \
+  (__FPCLASS_NEGNORMAL | __FPCLASS_NEGSUBNORMAL | __FPCLASS_NEGZERO)
+#define fcFinite (fcPosFinite | fcNegFinite)
+
+#define _CLC_DEFINE_ISFPCLASS_VEC(RET_TYPE, FUNCTION, MASK, ARG_TYPE)          \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
+    return (RET_TYPE)(__builtin_isfpclass(x, (MASK)) != (RET_TYPE)0);          \
+  }
+
+#define _CLC_DEFINE_ISFPCLASS(RET_TYPE, VEC_RET_TYPE, FUNCTION, MASK,          \
+                              ARG_TYPE)                                        \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
+    return __builtin_isfpclass(x, (MASK));                                     \
+  }                                                                            \
+  _CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##2, FUNCTION, MASK, ARG_TYPE##2)      \
+  _CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##3, FUNCTION, MASK, ARG_TYPE##3)      \
+  _CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##4, FUNCTION, MASK, ARG_TYPE##4)      \
+  _CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##8, FUNCTION, MASK, ARG_TYPE##8)      \
+  _CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##16, FUNCTION, MASK, ARG_TYPE##16)
+
 #endif // __CLC_RELATIONAL_RELATIONAL_H__
diff --git a/libclc/clc/lib/generic/relational/clc_isfinite.cl b/libclc/clc/lib/generic/relational/clc_isfinite.cl
@@ -1,31 +1,24 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isfinite, __builtin_isfinite, float)
+_CLC_DEFINE_ISFPCLASS(int, int, __clc_isfinite, fcFinite, float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_isfinite(double) returns an int, but the vector
 // versions return long.
-_CLC_DEF _CLC_OVERLOAD int __clc_isfinite(double x) {
-  return __builtin_isfinite(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isfinite, double)
+_CLC_DEFINE_ISFPCLASS(int, long, __clc_isfinite, fcFinite, double)
 
 #endif
+
 #ifdef cl_khr_fp16
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
 // The scalar version of __clc_isfinite(half) returns an int, but the vector
 // versions return short.
-_CLC_DEF _CLC_OVERLOAD int __clc_isfinite(half x) {
-  return __builtin_isfinite(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isfinite, half)
+_CLC_DEFINE_ISFPCLASS(int, short, __clc_isfinite, fcFinite, half)
 
 #endif
diff --git a/libclc/clc/lib/generic/relational/clc_isinf.cl b/libclc/clc/lib/generic/relational/clc_isinf.cl
@@ -1,17 +1,16 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isinf, __builtin_isinf, float)
+_CLC_DEFINE_ISFPCLASS(int, int, __clc_isinf, fcInf, float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_isinf(double) returns an int, but the vector
 // versions return long.
-_CLC_DEF _CLC_OVERLOAD int __clc_isinf(double x) { return __builtin_isinf(x); }
+_CLC_DEFINE_ISFPCLASS(int, long, __clc_isinf, fcInf, double)
 
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isinf, double)
 #endif
 
 #ifdef cl_khr_fp16
@@ -20,7 +19,6 @@ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isinf, double)
 
 // The scalar version of __clc_isinf(half) returns an int, but the vector
 // versions return short.
-_CLC_DEF _CLC_OVERLOAD int __clc_isinf(half x) { return __builtin_isinf(x); }
+_CLC_DEFINE_ISFPCLASS(int, short, __clc_isinf, fcInf, half)
 
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isinf, half)
 #endif
diff --git a/libclc/clc/lib/generic/relational/clc_isnan.cl b/libclc/clc/lib/generic/relational/clc_isnan.cl
@@ -1,17 +1,15 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isnan, __builtin_isnan, float)
+_CLC_DEFINE_ISFPCLASS(int, int, __clc_isnan, fcNan, float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_isnan(double) returns an int, but the vector
-// versions return long.
-_CLC_DEF _CLC_OVERLOAD int __clc_isnan(double x) { return __builtin_isnan(x); }
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnan, double)
+// versions return a long.
+_CLC_DEFINE_ISFPCLASS(int, long, __clc_isnan, fcNan, double)
 
 #endif
 
@@ -20,9 +18,7 @@ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnan, double)
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
 // The scalar version of __clc_isnan(half) returns an int, but the vector
-// versions return short.
-_CLC_DEF _CLC_OVERLOAD int __clc_isnan(half x) { return __builtin_isnan(x); }
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isnan, half)
+// versions return a short.
+_CLC_DEFINE_ISFPCLASS(int, short, __clc_isnan, fcNan, half)
 
 #endif
diff --git a/libclc/clc/lib/generic/relational/clc_isnormal.cl b/libclc/clc/lib/generic/relational/clc_isnormal.cl
@@ -1,31 +1,24 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isnormal, __builtin_isnormal, float)
+_CLC_DEFINE_ISFPCLASS(int, int, __clc_isnormal, fcNormal, float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_isnormal(double) returns an int, but the vector
 // versions return long.
-_CLC_DEF _CLC_OVERLOAD int __clc_isnormal(double x) {
-  return __builtin_isnormal(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnormal, double)
+_CLC_DEFINE_ISFPCLASS(int, long, __clc_isnormal, fcNormal, double)
 
 #endif
+
 #ifdef cl_khr_fp16
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
 // The scalar version of __clc_isnormal(half) returns an int, but the vector
 // versions return short.
-_CLC_DEF _CLC_OVERLOAD int __clc_isnormal(half x) {
-  return __builtin_isnormal(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isnormal, half)
+_CLC_DEFINE_ISFPCLASS(int, short, __clc_isnormal, fcNormal, half)
 
 #endif
diff --git a/libclc/clc/lib/generic/relational/clc_signbit.cl b/libclc/clc/lib/generic/relational/clc_signbit.cl
@@ -1,7 +1,56 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_signbit, __builtin_signbitf, float)
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE)        \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
+    return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} !=            \
+                      (RET_TYPE)0);                                            \
+  }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE)        \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
+    return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1),               \
+                                 FUNCTION(x.s2)} != (RET_TYPE)0);              \
+  }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE)        \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
+    return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1),               \
+                                 FUNCTION(x.s2),                               \
+                                 FUNCTION(x.s3)} != (RET_TYPE)0);              \
+  }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE)        \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
+    return (                                                                   \
+        RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2),   \
+                             FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5),   \
+                             FUNCTION(x.s6), FUNCTION(x.s7)} != (RET_TYPE)0);  \
+  }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE)       \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) {                       \
+    return (                                                                   \
+        RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2),   \
+                             FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5),   \
+                             FUNCTION(x.s6), FUNCTION(x.s7), FUNCTION(x.s8),   \
+                             FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb),   \
+                             FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se),   \
+                             FUNCTION(x.sf)} != (RET_TYPE)0);                  \
+  }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)     \
+  _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2)        \
+  _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3)        \
+  _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4)        \
+  _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8)        \
+  _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16)
+
+_CLC_DEF _CLC_OVERLOAD int __clc_signbit(float x) {
+  return __builtin_signbitf(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(int, __clc_signbit, float)
 
 #ifdef cl_khr_fp64