diff --git a/sycl/include/CL/sycl/detail/defines.hpp b/sycl/include/CL/sycl/detail/defines.hpp index 29dc311fe6808..c778ce9603fc9 100644 --- a/sycl/include/CL/sycl/detail/defines.hpp +++ b/sycl/include/CL/sycl/detail/defines.hpp @@ -33,11 +33,9 @@ #endif #if __cplusplus >= 201402 -#define __SYCL_DEPRECATED__ \ - [[deprecated("Replaced by in_order queue property")]] +#define __SYCL_DEPRECATED__(message) [[deprecated(message)]] #elif !defined _MSC_VER -#define __SYCL_DEPRECATED__ \ - __attribute__((deprecated("Replaced by in_order queue property"))) +#define __SYCL_DEPRECATED__(message) __attribute__((deprecated(message))) #else -#define __SYCL_DEPRECATED__ +#define __SYCL_DEPRECATED__(message) #endif diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index 0f437f13e00ed..e3c783c7bf65d 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -16,25 +16,46 @@ #ifdef __SYCL_DEVICE_ONLY__ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace intel { +struct sub_group; +} // namespace intel namespace detail { namespace spirv { +template struct group_scope {}; + +template struct group_scope> { + static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup; +}; + +template <> struct group_scope { + static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup; +}; + +template bool GroupAll(bool pred) { + return __spirv_GroupAll(group_scope::value, pred); +} + +template bool GroupAny(bool pred) { + return __spirv_GroupAny(group_scope::value, pred); +} + // Broadcast with scalar local index -template <__spv::Scope::Flag S, typename T, typename IdT> +template detail::enable_if_t::value, T> GroupBroadcast(T x, IdT local_id) { using OCLT = detail::ConvertToOpenCLType_t; using OCLIdT = detail::ConvertToOpenCLType_t; OCLT ocl_x = detail::convertDataToType(x); OCLIdT ocl_id = detail::convertDataToType(local_id); - return __spirv_GroupBroadcast(S, ocl_x, ocl_id); + return __spirv_GroupBroadcast(group_scope::value, ocl_x, ocl_id); } // Broadcast with vector local index -template <__spv::Scope::Flag S, typename T, int Dimensions> +template T GroupBroadcast(T x, id local_id) { if (Dimensions == 1) { - return GroupBroadcast(x, local_id[0]); + return GroupBroadcast(x, local_id[0]); } using IdT = vec; using OCLT = detail::ConvertToOpenCLType_t; @@ -45,7 +66,7 @@ T GroupBroadcast(T x, id local_id) { } OCLT ocl_x = detail::convertDataToType(x); OCLIdT ocl_id = detail::convertDataToType(vec_id); - return __spirv_GroupBroadcast(S, ocl_x, ocl_id); + return __spirv_GroupBroadcast(group_scope::value, ocl_x, ocl_id); } } // namespace spirv diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index ad8fa67313d91..f4b59a2b4068e 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ __SYCL_INLINE_NAMESPACE(cl) { @@ -30,6 +31,32 @@ template <> inline size_t get_local_linear_range>(group<2> g) { template <> inline size_t get_local_linear_range>(group<3> g) { return g.get_local_range(0) * g.get_local_range(1) * g.get_local_range(2); } +template <> +inline size_t get_local_linear_range(intel::sub_group g) { + return g.get_local_range()[0]; +} + +template +typename Group::linear_id_type get_local_linear_id(Group g); + +#ifdef __SYCL_DEVICE_ONLY__ +#define __SYCL_GROUP_GET_LOCAL_LINEAR_ID(D) \ + template <> \ + group::linear_id_type get_local_linear_id>(group g) { \ + nd_item it = cl::sycl::detail::Builder::getNDItem(); \ + return it.get_local_linear_id(); \ + } +__SYCL_GROUP_GET_LOCAL_LINEAR_ID(1); +__SYCL_GROUP_GET_LOCAL_LINEAR_ID(2); +__SYCL_GROUP_GET_LOCAL_LINEAR_ID(3); +#undef __SYCL_GROUP_GET_LOCAL_LINEAR_ID +#endif // __SYCL_DEVICE_ONLY__ + +template <> +inline intel::sub_group::linear_id_type +get_local_linear_id(intel::sub_group g) { + return g.get_local_id()[0]; +} template id linear_id_to_id(range, size_t linear_id); @@ -55,6 +82,15 @@ template struct is_group : std::false_type {}; template struct is_group> : std::true_type {}; +template struct is_sub_group : std::false_type {}; + +template <> struct is_sub_group : std::true_type {}; + +template +struct is_generic_group + : std::integral_constant::value || is_sub_group::value> {}; + template struct identity {}; template struct identity> { @@ -72,9 +108,7 @@ template struct identity> { template Function for_each(Group g, Ptr first, Ptr last, Function f) { #ifdef __SYCL_DEVICE_ONLY__ - nd_item it = - cl::sycl::detail::Builder::getNDItem(); - ptrdiff_t offset = it.get_local_linear_id(); + ptrdiff_t offset = detail::get_local_linear_id(g); ptrdiff_t stride = detail::get_local_linear_range(g); for (Ptr p = first + offset; p < last; p += stride) { f(*p); @@ -103,10 +137,11 @@ using EnableIfIsPointer = cl::sycl::detail::enable_if_t::value, T>; template bool all_of(Group g, bool pred) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_GroupAll(__spv::Scope::Workgroup, pred); + return detail::spirv::GroupAll(pred); #else throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); @@ -115,17 +150,19 @@ template bool all_of(Group g, bool pred) { template bool all_of(Group g, T x, Predicate pred) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); return all_of(g, pred(x)); } template EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, Predicate pred) { + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); bool partial = true; detail::for_each(g, first, last, [&](const typename Ptr::element_type &x) { partial &= pred(x); @@ -138,10 +175,11 @@ EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, } template bool any_of(Group g, bool pred) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_GroupAny(__spv::Scope::Workgroup, pred); + return detail::spirv::GroupAny(pred); #else throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); @@ -150,8 +188,9 @@ template bool any_of(Group g, bool pred) { template bool any_of(Group g, T x, Predicate pred) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); return any_of(g, pred(x)); } @@ -159,8 +198,9 @@ template EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, Predicate pred) { #ifdef __SYCL_DEVICE_ONLY__ - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); bool partial = false; detail::for_each(g, first, last, [&](const typename Ptr::element_type &x) { partial |= pred(x); @@ -173,10 +213,11 @@ EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, } template bool none_of(Group g, bool pred) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ - return __spirv_GroupAll(__spv::Scope::Workgroup, not pred); + return detail::spirv::GroupAll(not pred); #else throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); @@ -185,8 +226,9 @@ template bool none_of(Group g, bool pred) { template bool none_of(Group g, T x, Predicate pred) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); return none_of(g, pred(x)); } @@ -194,8 +236,9 @@ template EnableIfIsPointer none_of(Group g, Ptr first, Ptr last, Predicate pred) { #ifdef __SYCL_DEVICE_ONLY__ - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); return not any_of(g, first, last, pred); #else throw runtime_error("Group algorithms are not supported on host device.", @@ -206,10 +249,11 @@ EnableIfIsPointer none_of(Group g, Ptr first, Ptr last, template EnableIfIsScalarArithmetic broadcast(Group g, T x, typename Group::id_type local_id) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ - return detail::spirv::GroupBroadcast<__spv::Scope::Workgroup>(x, local_id); + return detail::spirv::GroupBroadcast(x, local_id); #else throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); @@ -219,8 +263,9 @@ EnableIfIsScalarArithmetic broadcast(Group g, T x, template EnableIfIsVectorArithmetic broadcast(Group g, T x, typename Group::id_type local_id) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -236,8 +281,9 @@ EnableIfIsVectorArithmetic broadcast(Group g, T x, template EnableIfIsScalarArithmetic broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return broadcast( g, x, detail::linear_id_to_id(g.get_local_range(), linear_local_id)); @@ -250,8 +296,9 @@ broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { template EnableIfIsVectorArithmetic broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -266,8 +313,9 @@ broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { template EnableIfIsScalarArithmetic broadcast(Group g, T x) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return broadcast(g, x, 0); #else @@ -278,8 +326,9 @@ EnableIfIsScalarArithmetic broadcast(Group g, T x) { template EnableIfIsVectorArithmetic broadcast(Group g, T x) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -294,14 +343,18 @@ EnableIfIsVectorArithmetic broadcast(Group g, T x) { template EnableIfIsScalarArithmetic reduce(Group g, T x, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision static_assert( - std::is_same::value, + std::is_same::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match reduction accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ return detail::calc( + detail::spirv::group_scope::value>( typename detail::GroupOpTag::type(), x, binary_op); #else throw runtime_error("Group algorithms are not supported on host device.", @@ -311,11 +364,15 @@ EnableIfIsScalarArithmetic reduce(Group g, T x, BinaryOperation binary_op) { template EnableIfIsVectorArithmetic reduce(Group g, T x, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision static_assert( std::is_same::value, + typename T::element_type>::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match reduction accumulation type."); T result; for (int s = 0; s < x.get_size(); ++s) { @@ -327,10 +384,14 @@ EnableIfIsVectorArithmetic reduce(Group g, T x, BinaryOperation binary_op) { template EnableIfIsScalarArithmetic reduce(Group g, V x, T init, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision static_assert( - std::is_same::value, + std::is_same::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match reduction accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ return binary_op(init, reduce(g, x, binary_op)); @@ -343,11 +404,15 @@ EnableIfIsScalarArithmetic reduce(Group g, V x, T init, template EnableIfIsVectorArithmetic reduce(Group g, V x, T init, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision static_assert( std::is_same::value, + typename T::element_type>::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match reduction accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ T result = init; @@ -364,11 +429,15 @@ EnableIfIsVectorArithmetic reduce(Group g, V x, T init, template EnableIfIsPointer reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision static_assert( std::is_same::value, + typename Ptr::element_type>::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match reduction accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ typename Ptr::element_type partial = @@ -386,10 +455,14 @@ reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { template EnableIfIsPointer reduce(Group g, Ptr first, Ptr last, T init, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision static_assert( - std::is_same::value, + std::is_same::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match reduction accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ T partial = @@ -407,13 +480,17 @@ EnableIfIsPointer reduce(Group g, Ptr first, Ptr last, T init, template EnableIfIsScalarArithmetic exclusive_scan(Group g, T x, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert(std::is_same::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match scan accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ return detail::calc( + detail::spirv::group_scope::value>( typename detail::GroupOpTag::type(), x, binary_op); #else throw runtime_error("Group algorithms are not supported on host device.", @@ -424,11 +501,16 @@ EnableIfIsScalarArithmetic exclusive_scan(Group g, T x, template EnableIfIsVectorArithmetic exclusive_scan(Group g, T x, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, - "Result type of binary_op must match scan accumulation type."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert( + std::is_same::value || + (std::is_same::value && + std::is_same::value), + "Result type of binary_op must match scan accumulation type."); T result; for (int s = 0; s < x.get_size(); ++s) { result[s] = exclusive_scan(g, x[s], binary_op); @@ -439,11 +521,16 @@ EnableIfIsVectorArithmetic exclusive_scan(Group g, T x, template EnableIfIsVectorArithmetic exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, - "Result type of binary_op must match scan accumulation type."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert( + std::is_same::value || + (std::is_same::value && + std::is_same::value), + "Result type of binary_op must match scan accumulation type."); T result; for (int s = 0; s < x.get_size(); ++s) { result[s] = exclusive_scan(g, x[s], init[s], binary_op); @@ -454,18 +541,22 @@ EnableIfIsVectorArithmetic exclusive_scan(Group g, V x, T init, template EnableIfIsScalarArithmetic exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert(std::is_same::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match scan accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ - nd_item it = - detail::Builder::getNDItem(); - if (it.get_local_linear_id() == 0) { + typename Group::linear_id_type local_linear_id = + detail::get_local_linear_id(g); + if (local_linear_id == 0) { x = binary_op(init, x); } T scan = exclusive_scan(g, x, binary_op); - if (it.get_local_linear_id() == 0) { + if (local_linear_id == 0) { scan = init; } return scan; @@ -480,14 +571,17 @@ template exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, - "Result type of binary_op must match scan accumulation type."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert( + std::is_same::value || + (std::is_same::value && + std::is_same::value), + "Result type of binary_op must match scan accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ - nd_item it = - cl::sycl::detail::Builder::getNDItem(); - ptrdiff_t offset = it.get_local_linear_id(); + ptrdiff_t offset = detail::get_local_linear_id(g); ptrdiff_t stride = detail::get_local_linear_range(g); ptrdiff_t N = last - first; auto roundup = [=](const ptrdiff_t &v, @@ -519,9 +613,13 @@ template exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, BinaryOperation binary_op) { - static_assert(std::is_same::value, - "Result type of binary_op must match scan accumulation type."); + // FIXME: Do not special-case for half precision + static_assert( + std::is_same::value || + (std::is_same::value && + std::is_same::value), + "Result type of binary_op must match scan accumulation type."); return exclusive_scan( g, first, last, result, detail::identity::value, @@ -531,11 +629,16 @@ EnableIfIsPointer exclusive_scan(Group g, InPtr first, template EnableIfIsVectorArithmetic inclusive_scan(Group g, T x, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, - "Result type of binary_op must match scan accumulation type."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert( + std::is_same::value || + (std::is_same::value && + std::is_same::value), + "Result type of binary_op must match scan accumulation type."); T result; for (int s = 0; s < x.get_size(); ++s) { result[s] = inclusive_scan(g, x[s], binary_op); @@ -546,13 +649,17 @@ EnableIfIsVectorArithmetic inclusive_scan(Group g, T x, template EnableIfIsScalarArithmetic inclusive_scan(Group g, T x, BinaryOperation binary_op) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert(std::is_same::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match scan accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ return detail::calc( + detail::spirv::group_scope::value>( typename detail::GroupOpTag::type(), x, binary_op); #else throw runtime_error("Group algorithms are not supported on host device.", @@ -563,14 +670,16 @@ EnableIfIsScalarArithmetic inclusive_scan(Group g, T x, template EnableIfIsScalarArithmetic inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert(std::is_same::value || + (std::is_same::value && + std::is_same::value), "Result type of binary_op must match scan accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ - nd_item it = - detail::Builder::getNDItem(); - if (it.get_local_linear_id() == 0) { + if (detail::get_local_linear_id(g) == 0) { x = binary_op(init, x); } return inclusive_scan(g, x, binary_op); @@ -583,10 +692,15 @@ inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { template EnableIfIsVectorArithmetic inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, - "Result type of binary_op must match scan accumulation type."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert( + std::is_same::value || + (std::is_same::value && + std::is_same::value), + "Result type of binary_op must match scan accumulation type."); T result; for (int s = 0; s < x.get_size(); ++s) { result[s] = inclusive_scan(g, x[s], binary_op, init[s]); @@ -599,14 +713,17 @@ template inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, BinaryOperation binary_op, T init) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); - static_assert(std::is_same::value, - "Result type of binary_op must match scan accumulation type."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); + // FIXME: Do not special-case for half precision + static_assert( + std::is_same::value || + (std::is_same::value && + std::is_same::value), + "Result type of binary_op must match scan accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ - nd_item it = - cl::sycl::detail::Builder::getNDItem(); - ptrdiff_t offset = it.get_local_linear_id(); + ptrdiff_t offset = detail::get_local_linear_id(g); ptrdiff_t stride = detail::get_local_linear_range(g); ptrdiff_t N = last - first; auto roundup = [=](const ptrdiff_t &v, @@ -638,21 +755,24 @@ template inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, BinaryOperation binary_op) { - static_assert(std::is_same::value, - "Result type of binary_op must match scan accumulation type."); + // FIXME: Do not special-case for half precision + static_assert( + std::is_same::value || + (std::is_same::value && + std::is_same::value), + "Result type of binary_op must match scan accumulation type."); return inclusive_scan( g, first, last, result, binary_op, detail::identity::value); } template bool leader(Group g) { - static_assert(detail::is_group::value, - "Group algorithms only support the sycl::group class."); + static_assert(detail::is_generic_group::value, + "Group algorithms only support the sycl::group and " + "intel::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ - nd_item it = - cl::sycl::detail::Builder::getNDItem(); - typename Group::linear_id_type linear_id = it.get_local_linear_id(); + typename Group::linear_id_type linear_id = detail::get_local_linear_id(g); return (linear_id == 0); #else throw runtime_error("Group algorithms are not supported on host device.", diff --git a/sycl/include/CL/sycl/intel/sub_group.hpp b/sycl/include/CL/sycl/intel/sub_group.hpp index 12dfb0eb262f7..7d610b7983f50 100644 --- a/sycl/include/CL/sycl/intel/sub_group.hpp +++ b/sycl/include/CL/sycl/intel/sub_group.hpp @@ -132,6 +132,12 @@ void store(multi_ptr dst, const vec &x) { namespace intel { struct sub_group { + + using id_type = id<1>; + using range_type = range<1>; + using linear_id_type = size_t; + static constexpr int dimensions = 1; + /* --- common interface members --- */ id<1> get_local_id() const { @@ -153,10 +159,12 @@ struct sub_group { /* --- vote / ballot functions --- */ + __SYCL_DEPRECATED__("Use sycl::intel::any_of instead.") bool any(bool predicate) const { return __spirv_GroupAny(__spv::Scope::Subgroup, predicate); } + __SYCL_DEPRECATED__("Use sycl::intel::all_of instead.") bool all(bool predicate) const { return __spirv_GroupAll(__spv::Scope::Subgroup, predicate); } @@ -168,11 +176,13 @@ struct sub_group { /* --- collectives --- */ template + __SYCL_DEPRECATED__("Use sycl::intel::broadcast instead.") EnableIfIsScalarArithmetic broadcast(T x, id<1> local_id) const { return detail::spirv::GroupBroadcast<__spv::Scope::Subgroup>(x, local_id); } template + __SYCL_DEPRECATED__("Use sycl::intel::reduce instead.") EnableIfIsScalarArithmetic reduce(T x, BinaryOperation op) const { return detail::calc( @@ -180,11 +190,13 @@ struct sub_group { } template + __SYCL_DEPRECATED__("Use sycl::intel::reduce instead.") EnableIfIsScalarArithmetic reduce(T x, T init, BinaryOperation op) const { return op(init, reduce(x, op)); } template + __SYCL_DEPRECATED__("Use sycl::intel::exclusive_scan instead.") EnableIfIsScalarArithmetic exclusive_scan(T x, BinaryOperation op) const { return detail::calc( @@ -192,6 +204,7 @@ struct sub_group { } template + __SYCL_DEPRECATED__("Use sycl::intel::exclusive_scan instead.") EnableIfIsScalarArithmetic exclusive_scan(T x, T init, BinaryOperation op) const { if (get_local_id().get(0) == 0) { @@ -205,6 +218,7 @@ struct sub_group { } template + __SYCL_DEPRECATED__("Use sycl::intel::inclusive_scan instead.") EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op) const { return detail::calc( @@ -212,6 +226,7 @@ struct sub_group { } template + __SYCL_DEPRECATED__("Use sycl::intel::inclusive_scan instead.") EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op, T init) const { if (get_local_id().get(0) == 0) { diff --git a/sycl/include/CL/sycl/intel/sub_group_host.hpp b/sycl/include/CL/sycl/intel/sub_group_host.hpp index 2496d116e28d3..d805c42b8869c 100644 --- a/sycl/include/CL/sycl/intel/sub_group_host.hpp +++ b/sycl/include/CL/sycl/intel/sub_group_host.hpp @@ -20,6 +20,12 @@ namespace sycl { template class multi_ptr; namespace intel { struct sub_group { + + using id_type = id<1>; + using range_type = range<1>; + using linear_id_type = size_t; + static constexpr int dimensions = 1; + /* --- common interface members --- */ id<1> get_local_id() const { diff --git a/sycl/include/CL/sycl/ordered_queue.hpp b/sycl/include/CL/sycl/ordered_queue.hpp index 240d780645e8b..eda3b48e18f66 100644 --- a/sycl/include/CL/sycl/ordered_queue.hpp +++ b/sycl/include/CL/sycl/ordered_queue.hpp @@ -28,7 +28,7 @@ namespace detail { class queue_impl; } -class __SYCL_DEPRECATED__ ordered_queue { +class __SYCL_DEPRECATED__("Replaced by in_order queue property") ordered_queue { public: explicit ordered_queue(const property_list &propList = {}) @@ -257,8 +257,6 @@ class __SYCL_DEPRECATED__ ordered_queue { const detail::code_location &CodeLoc); }; -#undef __SYCL_DEPRECATED__ - } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/sub_group/broadcast.cpp b/sycl/test/sub_group/broadcast.cpp index 41e73b22fc8a3..b67308363b66c 100644 --- a/sycl/test/sub_group/broadcast.cpp +++ b/sycl/test/sub_group/broadcast.cpp @@ -15,9 +15,11 @@ #include "helper.hpp" #include -template class sycl_subgr; +template +class sycl_subgr; using namespace cl::sycl; -template void check(queue &Queue) { +template +void check(queue &Queue) { const int G = 240, L = 60; try { nd_range<1> NdRange(G, L); @@ -30,7 +32,7 @@ template void check(queue &Queue) { intel::sub_group SG = NdItem.get_sub_group(); /*Broadcast GID of element with SGLID == SGID */ syclacc[NdItem.get_global_id()] = - SG.broadcast(NdItem.get_global_id(0), SG.get_group_id()); + broadcast(SG, T(NdItem.get_global_id(0)), SG.get_group_id()); if (NdItem.get_global_id(0) == 0) sgsizeacc[0] = SG.get_max_local_range()[0]; }); diff --git a/sycl/test/sub_group/reduce.cpp b/sycl/test/sub_group/reduce.cpp index 24d97cc276262..27e5baccd27ee 100644 --- a/sycl/test/sub_group/reduce.cpp +++ b/sycl/test/sub_group/reduce.cpp @@ -17,7 +17,8 @@ #include "helper.hpp" #include -template class sycl_subgr; +template +class sycl_subgr; using namespace cl::sycl; @@ -34,10 +35,10 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, intel::sub_group sg = NdItem.get_sub_group(); if (skip_init) { acc[NdItem.get_global_id(0)] = - sg.reduce(T(NdItem.get_global_id(0)), op); + reduce(sg, T(NdItem.get_global_id(0)), op); } else { acc[NdItem.get_global_id(0)] = - sg.reduce(T(NdItem.get_global_id(0)), init, op); + reduce(sg, T(NdItem.get_global_id(0)), init, op); } }); }); @@ -67,7 +68,8 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, } } -template void check(queue &Queue, size_t G = 240, size_t L = 60) { +template +void check(queue &Queue, size_t G = 240, size_t L = 60) { // limit data range for half to avoid rounding issues if (std::is_same::value) { G = 64; diff --git a/sycl/test/sub_group/scan.cpp b/sycl/test/sub_group/scan.cpp index bd3a653232127..3a61dfbcba4d9 100644 --- a/sycl/test/sub_group/scan.cpp +++ b/sycl/test/sub_group/scan.cpp @@ -18,7 +18,8 @@ #include #include -template class sycl_subgr; +template +class sycl_subgr; using namespace cl::sycl; @@ -36,14 +37,14 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, intel::sub_group sg = NdItem.get_sub_group(); if (skip_init) { exacc[NdItem.get_global_id(0)] = - sg.exclusive_scan(T(NdItem.get_global_id(0)), op); + exclusive_scan(sg, T(NdItem.get_global_id(0)), op); inacc[NdItem.get_global_id(0)] = - sg.inclusive_scan(T(NdItem.get_global_id(0)), op); + inclusive_scan(sg, T(NdItem.get_global_id(0)), op); } else { exacc[NdItem.get_global_id(0)] = - sg.exclusive_scan(T(NdItem.get_global_id(0)), init, op); + exclusive_scan(sg, T(NdItem.get_global_id(0)), init, op); inacc[NdItem.get_global_id(0)] = - sg.inclusive_scan(T(NdItem.get_global_id(0)), op, init); + inclusive_scan(sg, T(NdItem.get_global_id(0)), op, init); } }); }); @@ -75,7 +76,8 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, } } -template void check(queue &Queue, size_t G = 120, size_t L = 60) { +template +void check(queue &Queue, size_t G = 120, size_t L = 60) { // limit data range for half to avoid rounding issues if (std::is_same::value) { G = 64; diff --git a/sycl/test/sub_group/vote.cpp b/sycl/test/sub_group/vote.cpp index 16d0059d86f4d..6f0b4fc68a435 100644 --- a/sycl/test/sub_group/vote.cpp +++ b/sycl/test/sub_group/vote.cpp @@ -51,12 +51,12 @@ void check(queue Queue, const int G, const int L, const int D, const int R) { cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { intel::sub_group SG = NdItem.get_sub_group(); /* Set to 1 if any local ID in subgroup devided by D has remainder R */ - if (SG.any(SG.get_local_id().get(0) % D == R)) { + if (any_of(SG, SG.get_local_id().get(0) % D == R)) { sganyacc[NdItem.get_global_id()] = 1; } /* Set to 1 if remainder of division of subgroup local ID by D is less * than R for all work items in subgroup */ - if (SG.all(SG.get_local_id().get(0) % D < R)) { + if (all_of(SG, SG.get_local_id().get(0) % D < R)) { sgallacc[NdItem.get_global_id()] = 1; } });