diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 088d90f505f9a..00549976d9514 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -236,6 +236,10 @@ class DefaultIntArgument : IntArgument { int Default = default; } +class DefaultUnsignedArgument : UnsignedArgument { + int Default = default; +} + // This argument is more complex, it includes the enumerator type name, // a list of strings to accept, and a list of enumerators to map them to. class EnumArgument values, @@ -2413,13 +2417,16 @@ def NoDeref : TypeAttr { let Documentation = [NoDerefDocs]; } +// Default arguments in ReqWorkGroupSize can be used only with +// intel::reqd_work_group_size spelling. def ReqdWorkGroupSize : InheritableAttr { let Spellings = [GNU<"reqd_work_group_size">, - CXX11<"cl","reqd_work_group_size">]; - let Args = [UnsignedArgument<"XDim">, UnsignedArgument<"YDim">, - UnsignedArgument<"ZDim">]; + CXX11<"intel","reqd_work_group_size">, + CXX11<"cl","reqd_work_group_size">]; + let Args = [UnsignedArgument<"XDim">, DefaultUnsignedArgument<"YDim", 1>, + DefaultUnsignedArgument<"ZDim", 1>]; let Subjects = SubjectList<[Function], ErrorDiag>; - let Documentation = [Undocumented]; + let Documentation = [ReqdWorkGroupSizeAttrDocs]; } def WorkGroupSizeHint : InheritableAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 910c6b2bee61b..23bb435017d13 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1983,6 +1983,36 @@ device kernel, the attribute is ignored and it is not propagated to a kernel. }]; } +def ReqdWorkGroupSizeAttrDocs : Documentation { + let Category = DocCatFunction; + let Heading = "reqd_work_group_size"; + let Content = [{ +This attribute is documented by both OpenCL and SYCL standards +and allows to specify exact *local_work_size* which must be used as +argument to **clEnqueueNDRangeKernel** (in OpenCL) or to +**parallel_for** in SYCL. This allows the compiler to optimize the +generated code appropriately for the kernel to which attribute is applied. + +While semantic of this attribute is the same between OpenCL and SYCL, +spelling is a bit different: + +SYCL 1.2.1 describes ``[[cl::reqd_work_group_size(X, Y, Z)]]`` spelling: this +attribute is legal on device functions and is propagated down to any caller of +those device functions, such that the kernel attributes are the sum of all +attributes of all device functions called in this kernel. +See section 6.7 Attributes for more details. + +As Intel extension, ``[[intel::reqd_work_group_size(X, Y, Z)]]`` spelling is allowed +which features optional arguments `Y` and `Z`, those simplifies its usage if +only 1- or 2-dimensional ND-range is assumed by a programmer. These arguments +defaults to ``1``. + +In OpenCL C, this attribute is available in GNU spelling +(``__attribute__((reqd_work_group_size(X, Y, Z)))``), see section +6.7.2 Optional Attribute Qualifiers of OpenCL 1.2 specification for details. + }]; +} + def SYCLIntelMaxWorkGroupSizeAttrDocs : Documentation { let Category = DocCatFunction; let Heading = "max_work_group_size (IntelFPGA)"; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index f71b8327224aa..b6394311b74ab 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2922,14 +2922,22 @@ static void handleWorkGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) { return; uint32_t WGSize[3]; + if (AL.getKind() == ParsedAttr::AT_ReqdWorkGroupSize && + AL.getAttributeSpellingListIndex() == + ReqdWorkGroupSizeAttr::CXX11_intel_reqd_work_group_size) { + WGSize[1] = ReqdWorkGroupSizeAttr::DefaultYDim; + WGSize[2] = ReqdWorkGroupSizeAttr::DefaultZDim; + } else if (!checkAttributeNumArgs(S, AL, 3)) + return; + for (unsigned i = 0; i < 3; ++i) { - const Expr *E = AL.getArgAsExpr(i); - if (!checkUInt32Argument(S, AL, E, WGSize[i], i, + if (i < AL.getNumArgs() && + !checkUInt32Argument(S, AL, AL.getArgAsExpr(i), WGSize[i], i, /*StrictlyUnsigned=*/true)) return; if (WGSize[i] == 0) { S.Diag(AL.getLoc(), diag::err_attribute_argument_is_zero) - << AL << E->getSourceRange(); + << AL << AL.getArgAsExpr(i)->getSourceRange(); return; } } diff --git a/clang/test/SemaSYCL/intel-reqd-work-group-size.cpp b/clang/test/SemaSYCL/intel-reqd-work-group-size.cpp new file mode 100644 index 0000000000000..361b3d083c0ef --- /dev/null +++ b/clang/test/SemaSYCL/intel-reqd-work-group-size.cpp @@ -0,0 +1,136 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -DTRIGGER_ERROR %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -verify %s + +#ifndef __SYCL_DEVICE_ONLY__ +// expected-no-diagnostics +class Functor { +public: + [[intel::reqd_work_group_size(4)]] void operator()() {} +}; + +template +void kernel(Func kernelFunc) { + kernelFunc(); +} + +void bar() { + Functor f; + kernel(f); +} +#else +[[intel::reqd_work_group_size(4)]] void f4x1x1() {} // expected-note {{conflicting attribute is here}} +// expected-note@-1 {{conflicting attribute is here}} +[[intel::reqd_work_group_size(32)]] void f32x1x1() {} // expected-note {{conflicting attribute is here}} + +[[intel::reqd_work_group_size(16)]] void f16x1x1() {} // expected-note {{conflicting attribute is here}} +[[intel::reqd_work_group_size(16, 16)]] void f16x16x1() {} // expected-note {{conflicting attribute is here}} + +[[intel::reqd_work_group_size(32, 32)]] void f32x32x1() {} // expected-note {{conflicting attribute is here}} +[[intel::reqd_work_group_size(32, 32, 32)]] void f32x32x32() {} // expected-note {{conflicting attribute is here}} + +#ifdef TRIGGER_ERROR +class Functor32 { +public: + [[cl::reqd_work_group_size(32)]] void operator()() {} // expected-error {{'reqd_work_group_size' attribute requires exactly 3 arguments}} +}; +class Functor33 { +public: + [[intel::reqd_work_group_size(32, -4)]] void operator()() {} // expected-error {{'reqd_work_group_size' attribute requires a non-negative integral compile time constant expression}} +}; +#endif // TRIGGER_ERROR + +class Functor16 { +public: + [[intel::reqd_work_group_size(16)]] void operator()() {} +}; + +class Functor64 { +public: + [[intel::reqd_work_group_size(64, 64)]] void operator()() {} +}; + +class Functor16x16x16 { +public: + [[intel::reqd_work_group_size(16, 16, 16)]] void operator()() {} +}; + +class Functor8 { // expected-error {{conflicting attributes applied to a SYCL kernel}} +public: + [[intel::reqd_work_group_size(8)]] void operator()() { // expected-note {{conflicting attribute is here}} + f4x1x1(); + } +}; + +class Functor { +public: + void operator()() { + f4x1x1(); + } +}; + +class FunctorAttr { +public: + __attribute__((reqd_work_group_size(128, 128, 128))) void operator()() {} +}; + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +void bar() { + Functor16 f16; + kernel(f16); + + Functor f; + kernel(f); + + Functor16x16x16 f16x16x16; + kernel(f16x16x16); + + FunctorAttr fattr; + kernel(fattr); + + kernel([]() [[intel::reqd_work_group_size(32, 32, 32)]] { + f32x32x32(); + }); + +#ifdef TRIGGER_ERROR + Functor8 f8; + kernel(f8); + + kernel([]() { // expected-error {{conflicting attributes applied to a SYCL kernel}} + f4x1x1(); + f32x1x1(); + }); + + kernel([]() { // expected-error {{conflicting attributes applied to a SYCL kernel}} + f16x1x1(); + f16x16x1(); + }); + + kernel([]() { // expected-error {{conflicting attributes applied to a SYCL kernel}} + f32x32x32(); + f32x32x1(); + }); + + // expected-error@+1 {{expected variable name or 'this' in lambda capture list}} + kernel([[intel::reqd_work_group_size(32, 32, 32)]][]() { + f32x32x32(); + }); + +#endif // TRIGGER_ERROR +} + +// CHECK: FunctionDecl {{.*}} {{.*}}kernel_name1 +// CHECK: ReqdWorkGroupSizeAttr {{.*}} 1 1 16 +// CHECK: FunctionDecl {{.*}} {{.*}}kernel_name2 +// CHECK: ReqdWorkGroupSizeAttr {{.*}} 1 1 4 +// CHECK: FunctionDecl {{.*}} {{.*}}kernel_name3 +// CHECK: ReqdWorkGroupSizeAttr {{.*}} 16 16 16 +// CHECK: FunctionDecl {{.*}} {{.*}}kernel_name4 +// CHECK: ReqdWorkGroupSizeAttr {{.*}} 128 128 128 +// CHECK: FunctionDecl {{.*}} {{.*}}kernel_name5 +// CHECK: ReqdWorkGroupSizeAttr {{.*}} 32 32 32 +#endif // __SYCL_DEVICE_ONLY__ diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index ce95d78d7db03..37594e7b679c9 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -1312,6 +1312,9 @@ createArgument(const Record &Arg, StringRef Attr, Ptr = std::make_unique(Arg, Attr); else if (ArgName == "UnsignedArgument") Ptr = std::make_unique(Arg, Attr, "unsigned"); + else if (ArgName == "DefaultUnsignedArgument") + Ptr = std::make_unique(Arg, Attr, "unsigned", + Arg.getValueAsInt("Default")); else if (ArgName == "VariadicUnsignedArgument") Ptr = std::make_unique(Arg, Attr, "unsigned"); else if (ArgName == "VariadicStringArgument") @@ -2329,7 +2332,7 @@ static void emitAttributes(RecordKeeper &Records, raw_ostream &OS, SemanticSpellingMap SemanticToSyntacticMap; std::string SpellingEnum; - if (!ElideSpelling) + if (Spellings.size() > 1) SpellingEnum = CreateSemanticSpellings(Spellings, SemanticToSyntacticMap); if (Header) OS << SpellingEnum;