Skip to content

Commit a8c82d5

Browse files
authoredJan 28, 2025··
[libclc] Optimize isfpclass-like CLC builtins (#124145)
The builtins we were using to implement __clc_is(finite|inf|nan|normal) -- __builtin_isfinite, etc. -- don't take vector types so we were previously scalarizing. The __builtin_isfpclass builtin does take vector types and thus allows us to keep things in vectors. There is no change in codegen to the scalar versions of any of these builtins.
1 parent 8ce0d05 commit a8c82d5

File tree

6 files changed

+91
-94
lines changed

6 files changed

+91
-94
lines changed
 

‎libclc/clc/include/clc/relational/relational.h

+25-57
Original file line numberDiff line numberDiff line change
@@ -6,63 +6,6 @@
66
* when the result is true.
77
*/
88

9-
#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \
10-
ARG_TYPE) \
11-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
12-
return BUILTIN_NAME(x); \
13-
}
14-
15-
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \
16-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
17-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != \
18-
(RET_TYPE)0); \
19-
}
20-
21-
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \
22-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
23-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), \
24-
FUNCTION(x.s2)} != (RET_TYPE)0); \
25-
}
26-
27-
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \
28-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
29-
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), \
30-
FUNCTION(x.s2), \
31-
FUNCTION(x.s3)} != (RET_TYPE)0); \
32-
}
33-
34-
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \
35-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
36-
return ( \
37-
RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \
38-
FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \
39-
FUNCTION(x.s6), FUNCTION(x.s7)} != (RET_TYPE)0); \
40-
}
41-
42-
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \
43-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
44-
return ( \
45-
RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \
46-
FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \
47-
FUNCTION(x.s6), FUNCTION(x.s7), FUNCTION(x.s8), \
48-
FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \
49-
FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), \
50-
FUNCTION(x.sf)} != (RET_TYPE)0); \
51-
}
52-
53-
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \
54-
_CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \
55-
_CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \
56-
_CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \
57-
_CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \
58-
_CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16)
59-
60-
#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
61-
ARG_TYPE) \
62-
_CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
63-
ARG_TYPE) \
64-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)
65-
669
#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
6710
ARG1_TYPE, ARG2_TYPE) \
6811
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
@@ -89,4 +32,29 @@
8932
return _CLC_RELATIONAL_OP(x, y); \
9033
}
9134

35+
#define fcNan (__FPCLASS_SNAN | __FPCLASS_QNAN)
36+
#define fcInf (__FPCLASS_POSINF | __FPCLASS_NEGINF)
37+
#define fcNormal (__FPCLASS_POSNORMAL | __FPCLASS_NEGNORMAL)
38+
#define fcPosFinite \
39+
(__FPCLASS_POSNORMAL | __FPCLASS_POSSUBNORMAL | __FPCLASS_POSZERO)
40+
#define fcNegFinite \
41+
(__FPCLASS_NEGNORMAL | __FPCLASS_NEGSUBNORMAL | __FPCLASS_NEGZERO)
42+
#define fcFinite (fcPosFinite | fcNegFinite)
43+
44+
#define _CLC_DEFINE_ISFPCLASS_VEC(RET_TYPE, FUNCTION, MASK, ARG_TYPE) \
45+
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
46+
return (RET_TYPE)(__builtin_isfpclass(x, (MASK)) != (RET_TYPE)0); \
47+
}
48+
49+
#define _CLC_DEFINE_ISFPCLASS(RET_TYPE, VEC_RET_TYPE, FUNCTION, MASK, \
50+
ARG_TYPE) \
51+
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
52+
return __builtin_isfpclass(x, (MASK)); \
53+
} \
54+
_CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##2, FUNCTION, MASK, ARG_TYPE##2) \
55+
_CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##3, FUNCTION, MASK, ARG_TYPE##3) \
56+
_CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##4, FUNCTION, MASK, ARG_TYPE##4) \
57+
_CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##8, FUNCTION, MASK, ARG_TYPE##8) \
58+
_CLC_DEFINE_ISFPCLASS_VEC(VEC_RET_TYPE##16, FUNCTION, MASK, ARG_TYPE##16)
59+
9260
#endif // __CLC_RELATIONAL_RELATIONAL_H__
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,24 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isfinite, __builtin_isfinite, float)
4+
_CLC_DEFINE_ISFPCLASS(int, int, __clc_isfinite, fcFinite, float)
55

66
#ifdef cl_khr_fp64
77

88
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
99

1010
// The scalar version of __clc_isfinite(double) returns an int, but the vector
1111
// versions return long.
12-
_CLC_DEF _CLC_OVERLOAD int __clc_isfinite(double x) {
13-
return __builtin_isfinite(x);
14-
}
15-
16-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isfinite, double)
12+
_CLC_DEFINE_ISFPCLASS(int, long, __clc_isfinite, fcFinite, double)
1713

1814
#endif
15+
1916
#ifdef cl_khr_fp16
2017

2118
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2219

2320
// The scalar version of __clc_isfinite(half) returns an int, but the vector
2421
// versions return short.
25-
_CLC_DEF _CLC_OVERLOAD int __clc_isfinite(half x) {
26-
return __builtin_isfinite(x);
27-
}
28-
29-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isfinite, half)
22+
_CLC_DEFINE_ISFPCLASS(int, short, __clc_isfinite, fcFinite, half)
3023

3124
#endif
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isinf, __builtin_isinf, float)
4+
_CLC_DEFINE_ISFPCLASS(int, int, __clc_isinf, fcInf, float)
55

66
#ifdef cl_khr_fp64
77

88
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
99

1010
// The scalar version of __clc_isinf(double) returns an int, but the vector
1111
// versions return long.
12-
_CLC_DEF _CLC_OVERLOAD int __clc_isinf(double x) { return __builtin_isinf(x); }
12+
_CLC_DEFINE_ISFPCLASS(int, long, __clc_isinf, fcInf, double)
1313

14-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isinf, double)
1514
#endif
1615

1716
#ifdef cl_khr_fp16
@@ -20,7 +19,6 @@ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isinf, double)
2019

2120
// The scalar version of __clc_isinf(half) returns an int, but the vector
2221
// versions return short.
23-
_CLC_DEF _CLC_OVERLOAD int __clc_isinf(half x) { return __builtin_isinf(x); }
22+
_CLC_DEFINE_ISFPCLASS(int, short, __clc_isinf, fcInf, half)
2423

25-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isinf, half)
2624
#endif
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isnan, __builtin_isnan, float)
4+
_CLC_DEFINE_ISFPCLASS(int, int, __clc_isnan, fcNan, float)
55

66
#ifdef cl_khr_fp64
77

88
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
99

1010
// The scalar version of __clc_isnan(double) returns an int, but the vector
11-
// versions return long.
12-
_CLC_DEF _CLC_OVERLOAD int __clc_isnan(double x) { return __builtin_isnan(x); }
13-
14-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnan, double)
11+
// versions return a long.
12+
_CLC_DEFINE_ISFPCLASS(int, long, __clc_isnan, fcNan, double)
1513

1614
#endif
1715

@@ -20,9 +18,7 @@ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnan, double)
2018
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2119

2220
// The scalar version of __clc_isnan(half) returns an int, but the vector
23-
// versions return short.
24-
_CLC_DEF _CLC_OVERLOAD int __clc_isnan(half x) { return __builtin_isnan(x); }
25-
26-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isnan, half)
21+
// versions return a short.
22+
_CLC_DEFINE_ISFPCLASS(int, short, __clc_isnan, fcNan, half)
2723

2824
#endif
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,24 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isnormal, __builtin_isnormal, float)
4+
_CLC_DEFINE_ISFPCLASS(int, int, __clc_isnormal, fcNormal, float)
55

66
#ifdef cl_khr_fp64
77

88
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
99

1010
// The scalar version of __clc_isnormal(double) returns an int, but the vector
1111
// versions return long.
12-
_CLC_DEF _CLC_OVERLOAD int __clc_isnormal(double x) {
13-
return __builtin_isnormal(x);
14-
}
15-
16-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnormal, double)
12+
_CLC_DEFINE_ISFPCLASS(int, long, __clc_isnormal, fcNormal, double)
1713

1814
#endif
15+
1916
#ifdef cl_khr_fp16
2017

2118
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2219

2320
// The scalar version of __clc_isnormal(half) returns an int, but the vector
2421
// versions return short.
25-
_CLC_DEF _CLC_OVERLOAD int __clc_isnormal(half x) {
26-
return __builtin_isnormal(x);
27-
}
28-
29-
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isnormal, half)
22+
_CLC_DEFINE_ISFPCLASS(int, short, __clc_isnormal, fcNormal, half)
3023

3124
#endif

‎libclc/clc/lib/generic/relational/clc_signbit.cl

+50-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,56 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_signbit, __builtin_signbitf, float)
4+
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \
5+
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
6+
return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != \
7+
(RET_TYPE)0); \
8+
}
9+
10+
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \
11+
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
12+
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), \
13+
FUNCTION(x.s2)} != (RET_TYPE)0); \
14+
}
15+
16+
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \
17+
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
18+
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), \
19+
FUNCTION(x.s2), \
20+
FUNCTION(x.s3)} != (RET_TYPE)0); \
21+
}
22+
23+
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \
24+
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
25+
return ( \
26+
RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \
27+
FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \
28+
FUNCTION(x.s6), FUNCTION(x.s7)} != (RET_TYPE)0); \
29+
}
30+
31+
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \
32+
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
33+
return ( \
34+
RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \
35+
FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \
36+
FUNCTION(x.s6), FUNCTION(x.s7), FUNCTION(x.s8), \
37+
FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \
38+
FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), \
39+
FUNCTION(x.sf)} != (RET_TYPE)0); \
40+
}
41+
42+
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \
43+
_CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \
44+
_CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \
45+
_CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \
46+
_CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \
47+
_CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16)
48+
49+
_CLC_DEF _CLC_OVERLOAD int __clc_signbit(float x) {
50+
return __builtin_signbitf(x);
51+
}
52+
53+
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(int, __clc_signbit, float)
554

655
#ifdef cl_khr_fp64
756

0 commit comments

Comments
 (0)
Please sign in to comment.