diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 3455e0ab07219..27b3d3d6c137f 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include #include #include diff --git a/sycl/include/CL/sycl/backend.hpp b/sycl/include/CL/sycl/backend.hpp new file mode 100644 index 0000000000000..9edca021dca6b --- /dev/null +++ b/sycl/include/CL/sycl/backend.hpp @@ -0,0 +1,34 @@ +//==---------------- backend.hpp - SYCL PI backends ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +template +auto get_native(const SyclObjectT &Obj) -> + typename interop::type { + return Obj.template get_native(); +} + +// Native handle of an accessor should be accessed through interop_handler +template +auto get_native(const accessor &Obj) -> + typename interop>::type = + delete; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/backend/cuda.hpp b/sycl/include/CL/sycl/backend/cuda.hpp index aff3e96d54e4a..d215800e673b7 100644 --- a/sycl/include/CL/sycl/backend/cuda.hpp +++ b/sycl/include/CL/sycl/backend/cuda.hpp @@ -1,3 +1,4 @@ + //==---------------- cuda.hpp - SYCL CUDA backend --------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -6,29 +7,45 @@ // //===----------------------------------------------------------------------===// +#pragma once + +#include +#include +#include #include +#include +#include +#include + +typedef int CUdevice; +typedef struct CUctx_st *CUcontext; +typedef struct CUstream_st *CUstream; +typedef struct CUevent_st *CUevent; + +// As defined in the CUDA 10.1 header file. This requires CUDA version > 3.2 +#if defined(_WIN64) || defined(__LP64__) +typedef unsigned long long CUdeviceptr; +#else +typedef unsigned int CUdeviceptr; +#endif __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace backend { -namespace cuda { - -// CUDA backend specific options -// TODO: Use values that won't overlap with others - -// Mem Object info: Retrieve the raw CUDA pointer from a cl_mem -#define PI_CUDA_RAW_POINTER (0xFF01) -// Context creation: Use a primary CUDA context instead of a custom one by -// providing a property value of PI_TRUE for the following -// property ID. -#define PI_CONTEXT_PROPERTIES_CUDA_PRIMARY (0xFF02) - -// PI Command Queue using Default stream -#define PI_CUDA_USE_DEFAULT_STREAM (0xFF03) -// PI Command queue will sync with default stream -#define PI_CUDA_SYNC_WITH_DEFAULT (0xFF04) - -} // namespace cuda -} // namespace backend + +template <> struct interop { using type = CUdevice; }; + +template <> struct interop { using type = CUcontext; }; + +template <> struct interop { using type = CUstream; }; + +template <> struct interop { using type = CUevent; }; + +template +struct interop> { + using type = CUdeviceptr; +}; + } // namespace sycl } // namespace cl diff --git a/sycl/include/CL/sycl/backend/opencl.hpp b/sycl/include/CL/sycl/backend/opencl.hpp new file mode 100644 index 0000000000000..efd0f7df8a365 --- /dev/null +++ b/sycl/include/CL/sycl/backend/opencl.hpp @@ -0,0 +1,31 @@ + +//==---------------- opencl.hpp - SYCL OpenCL backend ----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +template <> struct interop { + using type = cl_command_queue; +}; + +template +struct interop> { + using type = cl_mem; +}; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/backend_types.hpp b/sycl/include/CL/sycl/backend_types.hpp new file mode 100644 index 0000000000000..3db71b8b47485 --- /dev/null +++ b/sycl/include/CL/sycl/backend_types.hpp @@ -0,0 +1,19 @@ +//==-------------- backend_types.hpp - SYCL backend types ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +namespace cl { +namespace sycl { + +enum class backend { host, opencl, cuda }; + +template struct interop; + +} // namespace sycl +} // namespace cl \ No newline at end of file diff --git a/sycl/include/CL/sycl/context.hpp b/sycl/include/CL/sycl/context.hpp index 1dfe69f7336ad..3fd83ba913b93 100644 --- a/sycl/include/CL/sycl/context.hpp +++ b/sycl/include/CL/sycl/context.hpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #pragma once + +#include #include #include #include @@ -135,10 +137,21 @@ class __SYCL_EXPORT context { /// \return a vector of valid SYCL device instances. vector_class get_devices() const; + /// Gets the native handle of the SYCL context. + /// + /// \return a native handle, the type of which defined by the backend. + template + auto get_native() const -> typename interop::type { + return reinterpret_cast::type>( + getNative()); + } + private: /// Constructs a SYCL context object from a valid context_impl instance. context(shared_ptr_class Impl); + pi_native_handle getNative() const; + shared_ptr_class impl; template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 6795ff3eeca7b..8dbe84d10e209 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -29,6 +30,12 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +// Forward declaration +class queue; +namespace detail { +class queue_impl; +} // namespace detail + // Interoperability handler // class interop_handler { @@ -37,26 +44,51 @@ class interop_handler { access::target AccTarget, access::placeholder isPlaceholder> friend class accessor; public: + using QueueImplPtr = std::shared_ptr; using ReqToMem = std::pair; - interop_handler(std::vector MemObjs, cl_command_queue PiQueue) : - MQueue(PiQueue), MMemObjs(MemObjs) {} + interop_handler(std::vector MemObjs, QueueImplPtr Queue) + : MQueue(std::move(Queue)), MMemObjs(std::move(MemObjs)) {} - cl_command_queue get_queue() const noexcept { return MQueue; }; + template + auto get_queue() const -> typename interop::type { + return reinterpret_cast::type>( + GetNativeQueue()); + } - template - cl_mem get_mem(accessor - Acc) const { + auto get_mem(accessor + Acc) const -> + typename interop>::type { detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; - return getMemImpl(detail::getSyclObjImpl(*AccBase).get()); + return getMemImpl( + detail::getSyclObjImpl(*AccBase).get()); } + private: - cl_command_queue MQueue; + QueueImplPtr MQueue; std::vector MMemObjs; - __SYCL_EXPORT cl_mem getMemImpl(detail::Requirement *Req) const; + + template + __SYCL_EXPORT auto + getMemImpl(detail::Requirement *Req) const -> typename interop< + BackendName, + accessor>::type { + return (typename interop>::type)GetNativeMem(Req); + } + + __SYCL_EXPORT pi_native_handle GetNativeMem(detail::Requirement *Req) const; + __SYCL_EXPORT pi_native_handle GetNativeQueue() const; }; namespace detail { diff --git a/sycl/include/CL/sycl/detail/cuda_definitions.hpp b/sycl/include/CL/sycl/detail/cuda_definitions.hpp new file mode 100644 index 0000000000000..f3b1e030a332e --- /dev/null +++ b/sycl/include/CL/sycl/detail/cuda_definitions.hpp @@ -0,0 +1,24 @@ +//==------------ cuda_definitions.hpp - SYCL CUDA backend ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +// CUDA backend specific options +// TODO: Use values that won't overlap with others + +// Mem Object info: Retrieve the raw CUDA pointer from a cl_mem +#define PI_CUDA_RAW_POINTER (0xFF01) +// Context creation: Use a primary CUDA context instead of a custom one by +// providing a property value of PI_TRUE for the following +// property ID. +#define PI_CONTEXT_PROPERTIES_CUDA_PRIMARY (0xFF02) + +// PI Command Queue using Default stream +#define PI_CUDA_USE_DEFAULT_STREAM (0xFF03) +// PI Command queue will sync with default stream +#define PI_CUDA_SYNC_WITH_DEFAULT (0xFF04) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index c16f1d4331695..a23364b3f7554 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -18,7 +18,6 @@ _PI_API(piPlatformsGet) _PI_API(piPlatformGetInfo) // Device -_PI_API(piextDeviceConvert) _PI_API(piDevicesGet) _PI_API(piDeviceGetInfo) _PI_API(piDevicePartition) @@ -26,18 +25,24 @@ _PI_API(piDeviceRetain) _PI_API(piDeviceRelease) _PI_API(piextDeviceSelectBinary) _PI_API(piextGetDeviceFunctionPointer) +_PI_API(piextDeviceGetNativeHandle) +_PI_API(piextDeviceCreateWithNativeHandle) // Context _PI_API(piContextCreate) _PI_API(piContextGetInfo) _PI_API(piContextRetain) _PI_API(piContextRelease) _PI_API(piextContextSetExtendedDeleter) +_PI_API(piextContextGetNativeHandle) +_PI_API(piextContextCreateWithNativeHandle) // Queue _PI_API(piQueueCreate) _PI_API(piQueueGetInfo) _PI_API(piQueueFinish) _PI_API(piQueueRetain) _PI_API(piQueueRelease) +_PI_API(piextQueueGetNativeHandle) +_PI_API(piextQueueCreateWithNativeHandle) // Memory _PI_API(piMemBufferCreate) _PI_API(piMemImageCreate) @@ -46,8 +51,9 @@ _PI_API(piMemImageGetInfo) _PI_API(piMemRetain) _PI_API(piMemRelease) _PI_API(piMemBufferPartition) +_PI_API(piextMemGetNativeHandle) +_PI_API(piextMemCreateWithNativeHandle) // Program -_PI_API(piextProgramConvert) _PI_API(piProgramCreate) _PI_API(piclProgramCreateWithSource) _PI_API(piclProgramCreateWithBinary) @@ -59,6 +65,8 @@ _PI_API(piProgramGetBuildInfo) _PI_API(piProgramRetain) _PI_API(piProgramRelease) _PI_API(piextProgramSetSpecializationConstant) +_PI_API(piextProgramGetNativeHandle) +_PI_API(piextProgramCreateWithNativeHandle) // Kernel _PI_API(piKernelCreate) _PI_API(piKernelSetArg) @@ -78,6 +86,8 @@ _PI_API(piEventSetCallback) _PI_API(piEventSetStatus) _PI_API(piEventRetain) _PI_API(piEventRelease) +_PI_API(piextEventGetNativeHandle) +_PI_API(piextEventCreateWithNativeHandle) // Sampler _PI_API(piSamplerCreate) _PI_API(piSamplerGetInfo) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index d51db55f85cb0..49e569077f9fd 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -59,6 +59,7 @@ using pi_uint32 = uint32_t; using pi_uint64 = uint64_t; using pi_bool = pi_uint32; using pi_bitfield = pi_uint64; +using pi_native_handle = uintptr_t; // // NOTE: prefer to map 1:1 to OpenCL so that no translation is needed @@ -762,19 +763,6 @@ pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); -// -// Device -// -/// -/// Create PI device from the given raw device handle (if the "device" -/// points to null), or, vice versa, extract the raw device handle into -/// the "handle" (if it was pointing to a null) from the given PI device. -/// NOTE: The instance of the PI device created is retained. -/// -pi_result piextDeviceConvert( - pi_device *device, ///< [in,out] the pointer to PI device - void **handle); ///< [in,out] the pointer to the raw device handle - pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices); @@ -792,6 +780,21 @@ pi_result piDevicePartition(pi_device device, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices); +/// Gets the native handle of a PI device object. +/// +/// \param device is the PI device to get the native handle of. +/// \param nativeHandle is the native handle of device. +pi_result piextDeviceGetNativeHandle(pi_device device, + pi_native_handle *nativeHandle); + +/// Creates PI device object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI device from. +/// \param device is the PI device created from the native handle. +pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_device *device); + /// Selects the most appropriate device binary based on runtime information /// and the IR characteristics. /// @@ -835,6 +838,21 @@ pi_result piextContextSetExtendedDeleter(pi_context context, pi_context_extended_deleter func, void *user_data); +/// Gets the native handle of a PI context object. +/// +/// \param context is the PI context to get the native handle of. +/// \param nativeHandle is the native handle of context. +pi_result piextContextGetNativeHandle(pi_context context, + pi_native_handle *nativeHandle); + +/// Creates PI context object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI context from. +/// \param context is the PI context created from the native handle. +pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context *context); + // // Queue // @@ -851,6 +869,21 @@ pi_result piQueueRelease(pi_queue command_queue); pi_result piQueueFinish(pi_queue command_queue); +/// Gets the native handle of a PI queue object. +/// +/// \param queue is the PI queue to get the native handle of. +/// \param nativeHandle is the native handle of queue. +pi_result piextQueueGetNativeHandle(pi_queue queue, + pi_native_handle *nativeHandle); + +/// Creates PI queue object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI queue from. +/// \param queue is the PI queue created from the native handle. +pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_queue *queue); + // // Memory // @@ -878,19 +911,24 @@ pi_result piMemRelease(pi_mem mem); pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem); + +/// Gets the native handle of a PI mem object. +/// +/// \param mem is the PI mem to get the native handle of. +/// \param nativeHandle is the native handle of mem. +pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle); + +/// Creates PI mem object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI mem from. +/// \param mem is the PI mem created from the native handle. +pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_mem *mem); + // // Program // -/// -/// Create PI program from the given raw program handle (if the "program" -/// points to null), or, vice versa, extract the raw program handle into -/// the "handle" (if it was pointing to a null) from the given PI program. -/// NOTE: The instance of the PI program created is retained. -/// -pi_result piextProgramConvert( - pi_context context, ///< [in] the PI context of the program - pi_program *program, ///< [in,out] the pointer to PI program - void **handle); ///< [in,out] the pointer to the raw program handle pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program); @@ -950,6 +988,21 @@ pi_result piextProgramSetSpecializationConstant(pi_program prog, size_t spec_size, const void *spec_value); +/// Gets the native handle of a PI program object. +/// +/// \param program is the PI program to get the native handle of. +/// \param nativeHandle is the native handle of program. +pi_result piextProgramGetNativeHandle(pi_program program, + pi_native_handle *nativeHandle); + +/// Creates PI program object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI program from. +/// \param program is the PI program created from the native handle. +pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_program *program); + // // Kernel // @@ -1041,6 +1094,21 @@ pi_result piEventRetain(pi_event event); pi_result piEventRelease(pi_event event); +/// Gets the native handle of a PI event object. +/// +/// \param event is the PI event to get the native handle of. +/// \param nativeHandle is the native handle of event. +pi_result piextEventGetNativeHandle(pi_event event, + pi_native_handle *nativeHandle); + +/// Creates PI event object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI event from. +/// \param event is the PI event created from the native handle. +pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_event *event); + // // Sampler // diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index a374288d703b6..7be744c635f59 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -311,12 +311,12 @@ template inline To cast(From value) { // These conversions should use PI interop API. template <> inline pi::PiProgram cast(cl_program interop) { - RT::assertion(false, "pi::cast -> use piextProgramConvert"); + RT::assertion(false, "pi::cast -> use piextProgramFromNative"); return {}; } template <> inline pi::PiDevice cast(cl_device_id interop) { - RT::assertion(false, "pi::cast -> use piextDeviceConvert"); + RT::assertion(false, "pi::cast -> use piextDeviceFromNative"); return {}; } } // namespace pi diff --git a/sycl/include/CL/sycl/device.hpp b/sycl/include/CL/sycl/device.hpp index c1ea233eaa68c..f198e28f8c5e8 100644 --- a/sycl/include/CL/sycl/device.hpp +++ b/sycl/include/CL/sycl/device.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -165,10 +166,21 @@ class __SYCL_EXPORT device { static vector_class get_devices(info::device_type deviceType = info::device_type::all); + /// Gets the native handle of the SYCL device. + /// + /// \return a native handle, the type of which defined by the backend. + template + auto get_native() const -> typename interop::type { + return static_cast::type>( + getNative()); + } + private: shared_ptr_class impl; device(shared_ptr_class impl) : impl(impl) {} + pi_native_handle getNative() const; + template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index 001f5ee209c15..5b97043897690 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -113,9 +114,19 @@ class __SYCL_EXPORT event { typename info::param_traits::return_type get_profiling_info() const; + /// Gets the native handle of the SYCL event. + /// + /// \return a native handle, the type of which defined by the backend. + template + auto get_native() const -> typename interop::type { + return static_cast::type>(getNative()); + } + private: event(shared_ptr_class EventImpl); + pi_native_handle getNative() const; + shared_ptr_class impl; template diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index bea3df4857094..685bcdb577d6e 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -638,7 +639,18 @@ class __SYCL_EXPORT queue { /// Equivalent to has_property() bool is_in_order() const; + /// Gets the native handle of the SYCL queue. + /// + /// \return a native handle, the type of which defined by the backend. + template + auto get_native() const -> typename interop::type { + return reinterpret_cast::type>( + getNative()); + } + private: + pi_native_handle getNative() const; + shared_ptr_class impl; template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 42270c2f32cbc..bbe3bc4442630 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -11,7 +11,7 @@ /// /// \ingroup sycl_pi_cuda -#include +#include #include #include @@ -705,12 +705,6 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform, return {}; } -/// \TODO Not implemented -pi_result cuda_piextDeviceConvert(pi_device *device, void **handle) { - cl::sycl::detail::pi::die("cuda_piextDeviceConvert not implemented"); - return {}; -} - /// \param devices List of devices available on the system /// \param num_devices Number of elements in the list of devices /// Requesting a non-GPU device triggers an error, all PI CUDA devices @@ -1358,6 +1352,33 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, return {}; } +/// Gets the native CUDA handle of a PI device object +/// +/// \param[in] device The PI device to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI device object. +/// +/// \return PI_SUCCESS +pi_result cuda_piextDeviceGetNativeHandle(pi_device device, + pi_native_handle *nativeHandle) { + *nativeHandle = static_cast(device->get()); + return PI_SUCCESS; +} + +/// Created a PI device object from a CUDA device handle. +/// TODO: Implement this. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param[in] nativeHandle The native handle to create PI device object from. +/// \param[out] device Set to the PI device object created from native handle. +/// +/// \return TBD +pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_device *device) { + cl::sycl::detail::pi::die( + "Creation of PI device from native handle not implemented"); + return {}; +} + /* Context APIs */ /// Create a PI CUDA context. @@ -1493,6 +1514,33 @@ pi_result cuda_piContextRelease(pi_context ctxt) { } } +/// Gets the native CUDA handle of a PI context object +/// +/// \param[in] context The PI context to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI context object. +/// +/// \return PI_SUCCESS +pi_result cuda_piextContextGetNativeHandle(pi_context context, + pi_native_handle *nativeHandle) { + *nativeHandle = reinterpret_cast(context->get()); + return PI_SUCCESS; +} + +/// Created a PI context object from a CUDA context handle. +/// TODO: Implement this. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param[in] nativeHandle The native handle to create PI context object from. +/// \param[out] context Set to the PI context object created from native handle. +/// +/// \return TBD +pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context *context) { + cl::sycl::detail::pi::die( + "Creation of PI context from native handle not implemented"); + return {}; +} + /// Creates a PI Memory object using a CUDA memory allocation. /// Can trigger a manual copy depending on the mode. /// \TODO Implement USE_HOST_PTR using cuHostRegister @@ -1675,6 +1723,33 @@ pi_result cuda_piMemGetInfo(pi_mem memObj, cl_mem_info queriedInfo, cl::sycl::detail::pi::die("cuda_piMemGetInfo not implemented"); } +/// Gets the native CUDA handle of a PI mem object +/// +/// \param[in] mem The PI mem to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI mem object. +/// +/// \return PI_SUCCESS +pi_result cuda_piextMemGetNativeHandle(pi_mem mem, + pi_native_handle *nativeHandle) { + *nativeHandle = static_cast(mem->get()); + return PI_SUCCESS; +} + +/// Created a PI mem object from a CUDA mem handle. +/// TODO: Implement this. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param[in] nativeHandle The native handle to create PI mem object from. +/// \param[out] mem Set to the PI mem object created from native handle. +/// +/// \return TBD +pi_result cuda_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_mem *mem) { + cl::sycl::detail::pi::die( + "Creation of PI mem from native handle not implemented"); + return {}; +} + /// Creates a `pi_queue` object on the CUDA backend. /// Valid properties /// * PI_CUDA_USE_DEFAULT_STREAM -> CU_STREAM_DEFAULT @@ -1810,6 +1885,33 @@ pi_result cuda_piQueueFinish(pi_queue command_queue) { return result; } +/// Gets the native CUDA handle of a PI queue object +/// +/// \param[in] queue The PI queue to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI queue object. +/// +/// \return PI_SUCCESS +pi_result cuda_piextQueueGetNativeHandle(pi_queue queue, + pi_native_handle *nativeHandle) { + *nativeHandle = reinterpret_cast(queue->get()); + return PI_SUCCESS; +} + +/// Created a PI queue object from a CUDA queue handle. +/// TODO: Implement this. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param[in] nativeHandle The native handle to create PI queue object from. +/// \param[out] queue Set to the PI queue object created from native handle. +/// +/// \return TBD +pi_result cuda_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_queue *queue) { + cl::sycl::detail::pi::die( + "Creation of PI queue from native handle not implemented"); + return {}; +} + pi_result cuda_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, @@ -2188,16 +2290,6 @@ pi_result cuda_piProgramBuild(pi_program program, pi_uint32 num_devices, return retError; } -/// \TODO Not implemented -pi_result cuda_piextProgramConvert( - pi_context context, ///< [in] the PI context of the program - pi_program *program, ///< [in,out] the pointer to PI program - void **handle) ///< [in,out] the pointer to the raw program handle -{ - cl::sycl::detail::pi::die("cuda_piextProgramConvert not implemented"); - return {}; -} - /// \TODO Not implemented pi_result cuda_piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program) { @@ -2407,6 +2499,33 @@ pi_result cuda_piProgramRelease(pi_program program) { return PI_SUCCESS; } +/// Gets the native CUDA handle of a PI program object +/// +/// \param[in] program The PI program to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI program object. +/// +/// \return TBD +pi_result cuda_piextProgramGetNativeHandle(pi_program program, + pi_native_handle *nativeHandle) { + *nativeHandle = reinterpret_cast(program->get()); + return PI_SUCCESS; +} + +/// Created a PI program object from a CUDA program handle. +/// TODO: Implement this. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param[in] nativeHandle The native handle to create PI program object from. +/// \param[out] program Set to the PI program object created from native handle. +/// +/// \return TBD +pi_result cuda_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_program *program) { + cl::sycl::detail::pi::die( + "Creation of PI program from native handle not implemented"); + return {}; +} + pi_result cuda_piKernelGetInfo( pi_kernel kernel, pi_kernel_info param_name, @@ -2762,6 +2881,36 @@ pi_result cuda_piEnqueueEventsWait(pi_queue command_queue, } } +/// Gets the native CUDA handle of a PI event object +/// +/// \param[in] event The PI event to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI event object. +/// +/// \return PI_SUCCESS on success. PI_INVALID_EVENT if given a user event. +pi_result cuda_piextEventGetNativeHandle(pi_event event, + pi_native_handle *nativeHandle) { + if (event->is_user_event()) { + return PI_INVALID_EVENT; + } + *nativeHandle = reinterpret_cast(event->get()); + return PI_SUCCESS; +} + +/// Created a PI event object from a CUDA event handle. +/// TODO: Implement this. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param[in] nativeHandle The native handle to create PI event object from. +/// \param[out] event Set to the PI event object created from native handle. +/// +/// \return TBD +pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_event *event) { + cl::sycl::detail::pi::die( + "Creation of PI event from native handle not implemented"); + return {}; +} + /// \TODO Not implemented in CUDA, need untie from OpenCL pi_result cuda_piSamplerCreate(pi_context context, const cl_sampler_properties *sampler_properties, @@ -3580,7 +3729,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPlatformsGet, cuda_piPlatformsGet) _PI_CL(piPlatformGetInfo, cuda_piPlatformGetInfo) // Device - _PI_CL(piextDeviceConvert, cuda_piextDeviceConvert) _PI_CL(piDevicesGet, cuda_piDevicesGet) _PI_CL(piDeviceGetInfo, cuda_piDeviceGetInfo) _PI_CL(piDevicePartition, cuda_piDevicePartition) @@ -3588,18 +3736,27 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piDeviceRelease, cuda_piDeviceRelease) _PI_CL(piextDeviceSelectBinary, cuda_piextDeviceSelectBinary) _PI_CL(piextGetDeviceFunctionPointer, cuda_piextGetDeviceFunctionPointer) + _PI_CL(piextDeviceGetNativeHandle, cuda_piextDeviceGetNativeHandle) + _PI_CL(piextDeviceCreateWithNativeHandle, + cuda_piextDeviceCreateWithNativeHandle) // Context _PI_CL(piextContextSetExtendedDeleter, cuda_piextContextSetExtendedDeleter) _PI_CL(piContextCreate, cuda_piContextCreate) _PI_CL(piContextGetInfo, cuda_piContextGetInfo) _PI_CL(piContextRetain, cuda_piContextRetain) _PI_CL(piContextRelease, cuda_piContextRelease) + _PI_CL(piextContextGetNativeHandle, cuda_piextContextGetNativeHandle) + _PI_CL(piextContextCreateWithNativeHandle, + cuda_piextContextCreateWithNativeHandle) // Queue _PI_CL(piQueueCreate, cuda_piQueueCreate) _PI_CL(piQueueGetInfo, cuda_piQueueGetInfo) _PI_CL(piQueueFinish, cuda_piQueueFinish) _PI_CL(piQueueRetain, cuda_piQueueRetain) _PI_CL(piQueueRelease, cuda_piQueueRelease) + _PI_CL(piextQueueGetNativeHandle, cuda_piextQueueGetNativeHandle) + _PI_CL(piextQueueCreateWithNativeHandle, + cuda_piextQueueCreateWithNativeHandle) // Memory _PI_CL(piMemBufferCreate, cuda_piMemBufferCreate) _PI_CL(piMemImageCreate, cuda_piMemImageCreate) @@ -3608,8 +3765,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piMemRetain, cuda_piMemRetain) _PI_CL(piMemRelease, cuda_piMemRelease) _PI_CL(piMemBufferPartition, cuda_piMemBufferPartition) + _PI_CL(piextMemGetNativeHandle, cuda_piextMemGetNativeHandle) + _PI_CL(piextMemCreateWithNativeHandle, cuda_piextMemCreateWithNativeHandle) // Program - _PI_CL(piextProgramConvert, cuda_piextProgramConvert) _PI_CL(piProgramCreate, cuda_piProgramCreate) _PI_CL(piclProgramCreateWithSource, cuda_piclProgramCreateWithSource) _PI_CL(piclProgramCreateWithBinary, cuda_piclProgramCreateWithBinary) @@ -3620,6 +3778,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piProgramGetBuildInfo, cuda_piProgramGetBuildInfo) _PI_CL(piProgramRetain, cuda_piProgramRetain) _PI_CL(piProgramRelease, cuda_piProgramRelease) + _PI_CL(piextMemGetNativeHandle, cuda_piextMemGetNativeHandle) + _PI_CL(piextMemCreateWithNativeHandle, cuda_piextMemCreateWithNativeHandle) // Kernel _PI_CL(piKernelCreate, cuda_piKernelCreate) _PI_CL(piKernelSetArg, cuda_piKernelSetArg) @@ -3639,6 +3799,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piEventSetStatus, cuda_piEventSetStatus) _PI_CL(piEventRetain, cuda_piEventRetain) _PI_CL(piEventRelease, cuda_piEventRelease) + _PI_CL(piextEventGetNativeHandle, cuda_piextEventGetNativeHandle) + _PI_CL(piextEventCreateWithNativeHandle, + cuda_piextEventCreateWithNativeHandle) // Sampler _PI_CL(piSamplerCreate, cuda_piSamplerCreate) _PI_CL(piSamplerGetInfo, cuda_piSamplerGetInfo) diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index d6989a87cf66f..382ef5ed85c3e 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -289,7 +289,7 @@ struct _pi_queue { cuda_piDeviceRelease(device_); } - native_type get() const { return stream_; }; + native_type get() const noexcept { return stream_; }; _pi_context *get_context() const { return context_; }; @@ -517,7 +517,7 @@ struct _pi_program { pi_context get_context() const { return context_; }; - native_type get() const { return module_; }; + native_type get() const noexcept { return module_; }; pi_uint32 increment_reference_count() noexcept { return ++refCount_; } diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 5f3f832c35e38..733322fe0f577 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -179,24 +179,6 @@ pi_result OCL(piPlatformsGet)(pi_uint32 num_entries, pi_platform *platforms, return static_cast(result); } -pi_result OCL(piextDeviceConvert)(pi_device *device, void **handle) { - // The PI device is the same as OpenCL device handle. - assert(device); - assert(handle); - - if (*device == nullptr) { - // unitialized *device. - assert(*handle); - *device = cast(*handle); - } else { - assert(*handle == nullptr); - *handle = *device; - } - - cl_int result = clRetainDevice(cast(*handle)); - return cast(result); -} - // Example of a PI interface that does not map exactly to an OpenCL one. pi_result OCL(piDevicesGet)(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, @@ -287,6 +269,13 @@ pi_result OCL(piextDeviceSelectBinary)(pi_device device, return PI_INVALID_BINARY; } +pi_result OCL(piextDeviceCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_device *piDevice) { + assert(piDevice != nullptr); + *piDevice = reinterpret_cast(nativeHandle); + return PI_SUCCESS; +} + pi_result OCL(piQueueCreate)(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue) { assert(queue && "piQueueCreate failed, queue argument is null"); @@ -327,25 +316,11 @@ pi_result OCL(piQueueCreate)(pi_context context, pi_device device, return cast(ret_err); } -pi_result OCL(piextProgramConvert)( - pi_context context, ///< [in] the PI context of the program - pi_program *program, ///< [in,out] the pointer to PI program - void **handle) ///< [in,out] the pointer to the raw program handle -{ - // The PI program is the same as OpenCL program handle. - assert(program); - assert(handle); - - if (*program == nullptr) { - // uninitialized *program. - assert(*handle); - *program = cast(*handle); - } else { - assert(*handle == nullptr); - *handle = *program; - } - cl_int result = clRetainProgram(cast(*handle)); - return cast(result); +pi_result OCL(piextQueueCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_queue *piQueue) { + assert(piQueue != nullptr); + *piQueue = reinterpret_cast(nativeHandle); + return PI_SUCCESS; } pi_result OCL(piProgramCreate)(pi_context context, const void *il, @@ -426,6 +401,13 @@ pi_result OCL(piProgramCreate)(pi_context context, const void *il, return err; } +pi_result OCL(piextProgramCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_program *piProgram) { + assert(piProgram != nullptr); + *piProgram = reinterpret_cast(nativeHandle); + return PI_SUCCESS; +} + pi_result OCL(piSamplerCreate)(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler) { @@ -517,6 +499,13 @@ pi_result OCL(piContextCreate)(const pi_context_properties *properties, return ret; } +pi_result OCL(piextContextCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_context *piContext) { + assert(piContext != nullptr); + *piContext = reinterpret_cast(nativeHandle); + return PI_SUCCESS; +} + pi_result OCL(piMemBufferCreate)(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem) { pi_result ret_err = PI_INVALID_OPERATION; @@ -553,6 +542,13 @@ pi_result OCL(piMemBufferPartition)(pi_mem buffer, pi_mem_flags flags, return ret_err; } +pi_result OCL(piextMemCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_mem *piMem) { + assert(piMem != nullptr); + *piMem = reinterpret_cast(nativeHandle); + return PI_SUCCESS; +} + pi_result OCL(piclProgramCreateWithSource)(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, @@ -614,6 +610,13 @@ pi_result OCL(piEventCreate)(pi_context context, pi_event *ret_event) { return ret_err; } +pi_result OCL(piextEventCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_event *piEvent) { + assert(piEvent != nullptr); + *piEvent = reinterpret_cast(nativeHandle); + return PI_SUCCESS; +} + pi_result OCL(piEnqueueMemBufferMap)( pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, cl_map_flags map_flags, // TODO: untie from OpenCL @@ -1043,6 +1046,19 @@ static pi_result OCL(piextProgramSetSpecializationConstantImpl)( return cast(Res); } +/// Common API for getting the native handle of a PI object +/// +/// \param piObj is the pi object to get the native handle of +/// \param nativeHandle is a pointer to be set to the native handle +/// +/// PI_SUCCESS +pi_result OCL(piextGetNativeHandle)(void *piObj, + pi_native_handle *nativeHandle) { + assert(nativeHandle != nullptr); + *nativeHandle = reinterpret_cast(piObj); + return PI_SUCCESS; +} + pi_result piPluginInit(pi_plugin *PluginInit) { int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion); if (CompareVersions < 0) { @@ -1061,7 +1077,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPlatformsGet, OCL(piPlatformsGet)) _PI_CL(piPlatformGetInfo, clGetPlatformInfo) // Device - _PI_CL(piextDeviceConvert, OCL(piextDeviceConvert)) _PI_CL(piDevicesGet, OCL(piDevicesGet)) _PI_CL(piDeviceGetInfo, clGetDeviceInfo) _PI_CL(piDevicePartition, clCreateSubDevices) @@ -1069,17 +1084,26 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piDeviceRelease, clReleaseDevice) _PI_CL(piextDeviceSelectBinary, OCL(piextDeviceSelectBinary)) _PI_CL(piextGetDeviceFunctionPointer, OCL(piextGetDeviceFunctionPointer)) + _PI_CL(piextDeviceGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextDeviceCreateWithNativeHandle, + OCL(piextDeviceCreateWithNativeHandle)) // Context _PI_CL(piContextCreate, OCL(piContextCreate)) _PI_CL(piContextGetInfo, clGetContextInfo) _PI_CL(piContextRetain, clRetainContext) _PI_CL(piContextRelease, clReleaseContext) + _PI_CL(piextContextGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextContextCreateWithNativeHandle, + OCL(piextContextCreateWithNativeHandle)) // Queue _PI_CL(piQueueCreate, OCL(piQueueCreate)) _PI_CL(piQueueGetInfo, clGetCommandQueueInfo) _PI_CL(piQueueFinish, clFinish) _PI_CL(piQueueRetain, clRetainCommandQueue) _PI_CL(piQueueRelease, clReleaseCommandQueue) + _PI_CL(piextQueueGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextQueueCreateWithNativeHandle, + OCL(piextQueueCreateWithNativeHandle)) // Memory _PI_CL(piMemBufferCreate, OCL(piMemBufferCreate)) _PI_CL(piMemImageCreate, OCL(piMemImageCreate)) @@ -1088,8 +1112,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piMemRetain, clRetainMemObject) _PI_CL(piMemRelease, clReleaseMemObject) _PI_CL(piMemBufferPartition, OCL(piMemBufferPartition)) + _PI_CL(piextMemGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextMemCreateWithNativeHandle, OCL(piextMemCreateWithNativeHandle)) // Program - _PI_CL(piextProgramConvert, OCL(piextProgramConvert)) _PI_CL(piProgramCreate, OCL(piProgramCreate)) _PI_CL(piclProgramCreateWithSource, OCL(piclProgramCreateWithSource)) _PI_CL(piclProgramCreateWithBinary, OCL(piclProgramCreateWithBinary)) @@ -1102,7 +1127,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piProgramRelease, clReleaseProgram) _PI_CL(piextProgramSetSpecializationConstant, OCL(piextProgramSetSpecializationConstantImpl)) - + _PI_CL(piextProgramGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextProgramCreateWithNativeHandle, + OCL(piextProgramCreateWithNativeHandle)) // Kernel _PI_CL(piKernelCreate, OCL(piKernelCreate)) _PI_CL(piKernelSetArg, clSetKernelArg) @@ -1122,6 +1149,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piEventSetStatus, clSetUserEventStatus) _PI_CL(piEventRetain, clRetainEvent) _PI_CL(piEventRelease, clReleaseEvent) + _PI_CL(piextEventGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextEventCreateWithNativeHandle, + OCL(piextEventCreateWithNativeHandle)) // Sampler _PI_CL(piSamplerCreate, OCL(piSamplerCreate)) _PI_CL(piSamplerGetInfo, clGetSamplerInfo) diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index 49b49914eabf6..ca58bdac310df 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -96,5 +96,7 @@ vector_class context::get_devices() const { context::context(shared_ptr_class Impl) : impl(Impl) {} +pi_native_handle context::getNative() const { return impl->getNative(); } + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/cg.cpp b/sycl/source/detail/cg.cpp index d4c5a1563cdba..2441d1cb7e919 100644 --- a/sycl/source/detail/cg.cpp +++ b/sycl/source/detail/cg.cpp @@ -8,11 +8,11 @@ #include "CL/sycl/detail/cg.hpp" #include +#include #include #include #include - #include #include #include @@ -21,17 +21,24 @@ namespace cl { namespace sycl { -cl_mem interop_handler::getMemImpl(detail::Requirement* Req) const { - auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), - [=](ReqToMem Elem) { - return (Elem.first == Req); - }); +pi_native_handle interop_handler::GetNativeQueue() const { + return MQueue->getNative(); +} + +pi_native_handle interop_handler::GetNativeMem(detail::Requirement *Req) const { + auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), + [=](ReqToMem Elem) { return (Elem.first == Req); }); - if (Iter == std::end(MMemObjs)) { - throw("Invalid memory object used inside interop"); - } - return detail::pi::cast(Iter->second); + if (Iter == std::end(MMemObjs)) { + throw("Invalid memory object used inside interop"); } + auto Plugin = MQueue->getPlugin(); + pi_native_handle Handle; + Plugin.call(Iter->second, + &Handle); + return Handle; +} + } // sycl } // cl diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index d1bee11d0d8ec..669d047aae730 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -6,9 +6,9 @@ // // ===--------------------------------------------------------------------=== // -#include #include #include +#include #include #include #include @@ -151,6 +151,13 @@ context_impl::hasDevice(shared_ptr_class Device) const { return false; } +pi_native_handle context_impl::getNative() const { + auto Plugin = getPlugin(); + pi_native_handle Handle; + Plugin.call(getHandleRef(), &Handle); + return Handle; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 266c3879366bc..4df0fde2cf0ea 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -143,6 +143,11 @@ class context_impl { /// Returns true if and only if context contains the given device. bool hasDevice(shared_ptr_class Device) const; + /// Gets the native handle of the SYCL context. + /// + /// \return a native handle. + pi_native_handle getNative() const; + private: async_handler MAsyncHandler; vector_class MDevices; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index b7b4a76c5a395..f060986438c63 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -19,27 +19,29 @@ device_impl::device_impl() : MIsHostDevice(true), MPlatform(std::make_shared(platform_impl())) {} -device_impl::device_impl(device_interop_handle_t InteropDeviceHandle, +device_impl::device_impl(pi_native_handle InteropDeviceHandle, const plugin &Plugin) : device_impl(InteropDeviceHandle, nullptr, nullptr, Plugin) {} device_impl::device_impl(RT::PiDevice Device, PlatformImplPtr Platform) - : device_impl(nullptr, Device, Platform, Platform->getPlugin()) {} + : device_impl(reinterpret_cast(nullptr), Device, Platform, + Platform->getPlugin()) {} device_impl::device_impl(RT::PiDevice Device, const plugin &Plugin) - : device_impl(nullptr, Device, nullptr, Plugin) {} + : device_impl(reinterpret_cast(nullptr), Device, nullptr, + Plugin) {} -device_impl::device_impl(device_interop_handle_t InteropDeviceHandle, +device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) : MDevice(Device), MIsHostDevice(false) { bool InteroperabilityConstructor = false; if (Device == nullptr) { - assert(InteropDeviceHandle != nullptr); + assert(InteropDeviceHandle); // Get PI device from the raw device handle. - Plugin.call(&MDevice, - (void **)&InteropDeviceHandle); + Plugin.call( + InteropDeviceHandle, &MDevice); InteroperabilityConstructor = true; } @@ -56,7 +58,7 @@ device_impl::device_impl(device_interop_handle_t InteropDeviceHandle, if (!MIsRootDevice && !InteroperabilityConstructor) { // TODO catch an exception and put it to list of asynchronous exceptions // Interoperability Constructor already calls DeviceRetain in - // piextDeviceConvert. + // piextDeviceFromNative. Plugin.call(MDevice); } @@ -97,10 +99,7 @@ cl_device_id device_impl::get() const { // TODO catch an exception and put it to list of asynchronous exceptions Plugin.call(MDevice); } - void *handle = nullptr; - Plugin.call( - const_cast(&MDevice), &handle); - return pi::cast(handle); + return pi::cast(getNative()); } platform device_impl::get_platform() const { @@ -214,6 +213,13 @@ vector_class device_impl::create_sub_devices( return create_sub_devices(Properties, SubDevicesCount); } +pi_native_handle device_impl::getNative() const { + auto Plugin = getPlugin(); + pi_native_handle Handle; + Plugin.call(getHandleRef(), &Handle); + return Handle; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 71572216478d5..56b47cefffccb 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -27,11 +27,6 @@ namespace detail { class platform_impl; using PlatformImplPtr = std::shared_ptr; -// TODO: SYCL BE generalization will change this to something better. -// For now this saves us from unwanted implicit casts. -struct _device_interop_handle_t; -using device_interop_handle_t = _device_interop_handle_t *; - // TODO: Make code thread-safe class device_impl { public: @@ -39,7 +34,7 @@ class device_impl { device_impl(); /// Constructs a SYCL device instance using the provided raw device handle. - explicit device_impl(device_interop_handle_t, const plugin &Plugin); + explicit device_impl(pi_native_handle, const plugin &Plugin); /// Constructs a SYCL device instance using the provided /// PI device instance. @@ -203,10 +198,14 @@ class device_impl { bool is_affinity_supported(info::partition_affinity_domain AffinityDomain) const; + /// Gets the native handle of the SYCL device. + /// + /// \return a native handle. + pi_native_handle getNative() const; + private: - explicit device_impl(device_interop_handle_t InteropDevice, - RT::PiDevice Device, PlatformImplPtr Platform, - const plugin &Plugin); + explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, + PlatformImplPtr Platform, const plugin &Plugin); RT::PiDevice MDevice = 0; RT::PiDeviceType MType; bool MIsRootDevice = false; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index b4678181df937..f3263720a2bb4 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -258,6 +258,13 @@ void HostProfilingInfo::start() { StartTime = getTimestamp(); } void HostProfilingInfo::end() { EndTime = getTimestamp(); } +pi_native_handle event_impl::getNative() const { + auto Plugin = getPlugin(); + pi_native_handle Handle; + Plugin.call(getHandleRef(), &Handle); + return Handle; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index d21c84c6b1a96..94600f5eb6f9f 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -145,6 +145,11 @@ class event_impl { /// @return a pointer to HostProfilingInfo instance. HostProfilingInfo *getHostProfilingInfo() { return MHostProfilingInfo.get(); } + /// Gets the native handle of the SYCL event. + /// + /// \return a native handle. + pi_native_handle getNative() const; + private: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index 71eb1c4e9464e..bb880ce9242c1 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -83,21 +83,21 @@ program_impl::program_impl( } program_impl::program_impl(ContextImplPtr Context, - program_interop_handle_t InteropProgram) + pi_native_handle InteropProgram) : program_impl(Context, InteropProgram, nullptr) {} program_impl::program_impl(ContextImplPtr Context, - program_interop_handle_t InteropProgram, + pi_native_handle InteropProgram, RT::PiProgram Program) : MProgram(Program), MContext(Context), MLinkable(true) { const detail::plugin &Plugin = getPlugin(); if (MProgram == nullptr) { - assert(InteropProgram != nullptr && - "No InteropProgram/PiProgram defined with piextProgramConvert"); + assert(InteropProgram && + "No InteropProgram/PiProgram defined with piextProgramFromNative"); // Translate the raw program handle into PI program. - Plugin.call( - Context->getHandleRef(), &MProgram, (void **)&InteropProgram); + Plugin.call(InteropProgram, + &MProgram); } else Plugin.call(Program); @@ -158,7 +158,7 @@ program_impl::program_impl(ContextImplPtr Context, } program_impl::program_impl(ContextImplPtr Context, RT::PiKernel Kernel) - : program_impl(Context, nullptr, + : program_impl(Context, reinterpret_cast(nullptr), ProgramManager::getInstance().getPiProgramFromPiKernel( Kernel, Context)) {} diff --git a/sycl/source/detail/program_impl.hpp b/sycl/source/detail/program_impl.hpp index ddf79492f2f3d..3c172d706e84c 100644 --- a/sycl/source/detail/program_impl.hpp +++ b/sycl/source/detail/program_impl.hpp @@ -31,11 +31,6 @@ namespace detail { using ContextImplPtr = std::shared_ptr; -// TODO: SYCL BE generalization will change this to something better. -// For now this saves us from unwanted implicit casts. -struct _program_interop_handle_t; -using program_interop_handle_t = _program_interop_handle_t *; - class program_impl { public: program_impl() = delete; @@ -89,7 +84,7 @@ class program_impl { /// \param Context is a pointer to SYCL context impl. /// \param InteropProgram is an instance of plugin interface interoperability /// program. - program_impl(ContextImplPtr Context, program_interop_handle_t InteropProgram); + program_impl(ContextImplPtr Context, pi_native_handle InteropProgram); /// Constructs a program instance from plugin interface interoperability /// kernel. @@ -305,7 +300,7 @@ class program_impl { private: // Deligating Constructor used in Implementation. - program_impl(ContextImplPtr Context, program_interop_handle_t InteropProgram, + program_impl(ContextImplPtr Context, pi_native_handle InteropProgram, RT::PiProgram Program); /// Checks feature support for specific devices. /// diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 8a504c9d03438..0758519720fff 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -185,6 +185,13 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { #endif } +pi_native_handle queue_impl::getNative() const { + auto Plugin = getPlugin(); + pi_native_handle Handle; + Plugin.call(MCommandQueue, &Handle); + return Handle; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index faf58914f2354..e48d59694af27 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -346,6 +346,11 @@ class queue_impl { MExceptions.PushBack(ExceptionPtr); } + /// Gets the native handle of the SYCL queue. + /// + /// \return a native handle. + pi_native_handle getNative() const; + private: /// Performs command group submission to the queue. /// diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index bb783468ad604..0ba4d01462b7e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1772,12 +1772,13 @@ cl_int ExecCGCommand::enqueueImp() { ReqMemObjs.emplace_back(ReqToMem); }); - auto interop_queue = MQueue->get(); std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs)); - interop_handler InteropHandler(std::move(ReqMemObjs), interop_queue); + interop_handler InteropHandler(std::move(ReqMemObjs), MQueue); ExecInterop->MInteropTask->call(InteropHandler); - Plugin.call(MQueue->getHandleRef(), 0, nullptr, &Event); - Plugin.call(reinterpret_cast(interop_queue)); + Plugin.call(MQueue->getHandleRef(), 0, + nullptr, &Event); + Plugin.call( + reinterpret_cast(MQueue->get())); return CL_SUCCESS; } case CG::CGTYPE::NONE: diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 8d28be7ac855b..d08fa5a63922e 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -30,8 +30,11 @@ device::device() : impl(std::make_shared()) {} device::device(cl_device_id deviceId) : impl(std::make_shared( - detail::pi::cast(deviceId), - *RT::GlobalPlugin)) {} + detail::pi::cast(deviceId), *RT::GlobalPlugin)) { + // The implementation constructor takes ownership of the native handle so we + // must retain it in order to adhere to SYCL 1.2.1 spec (Rev6, section 4.3.1.) + clRetainDevice(deviceId); +} device::device(const device_selector &deviceSelector) { *this = deviceSelector.select_device(); @@ -122,5 +125,7 @@ device::get_info() const { #undef PARAM_TRAITS_SPEC +pi_native_handle device::getNative() const { return impl->getNative(); } + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 900a6a06a4af9..220c68eaa36c2 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -85,5 +85,7 @@ event::event(shared_ptr_class event_impl) #undef PARAM_TRAITS_SPEC +pi_native_handle event::getNative() const { return impl->getNative(); } + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/program.cpp b/sycl/source/program.cpp index 4e24ee9088929..6884e91cc566b 100644 --- a/sycl/source/program.cpp +++ b/sycl/source/program.cpp @@ -30,7 +30,11 @@ program::program(vector_class programList, string_class linkOptions) { program::program(const context &context, cl_program clProgram) : impl(std::make_shared( detail::getSyclObjImpl(context), - detail::pi::cast(clProgram))) {} + detail::pi::cast(clProgram))) { + // The implementation constructor takes ownership of the native handle so we + // must retain it in order to adhere to SYCL 1.2.1 spec (Rev6, section 4.3.1.) + clRetainProgram(clProgram); +} program::program(std::shared_ptr impl) : impl(impl) {} cl_program program::get() const { return impl->get(); } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 1ca3605eb494e..ab4da16014beb 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -151,5 +151,8 @@ queue::get_property() const; bool queue::is_in_order() const { return impl->has_property(); } + +pi_native_handle queue::getNative() const { return impl->getNative(); } + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/unittests/pi/cuda/CMakeLists.txt b/sycl/unittests/pi/cuda/CMakeLists.txt index a01c771af5d9b..259b2fa9e517f 100644 --- a/sycl/unittests/pi/cuda/CMakeLists.txt +++ b/sycl/unittests/pi/cuda/CMakeLists.txt @@ -3,6 +3,7 @@ add_sycl_unittest(PiCudaTests OBJECT test_base_objects.cpp test_commands.cpp test_device.cpp + test_interop_get_native.cpp test_kernels.cpp test_mem_obj.cpp test_primary_context.cpp diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp index 3c4f8888a4bc2..34c361a5c43f1 100644 --- a/sycl/unittests/pi/cuda/test_base_objects.cpp +++ b/sycl/unittests/pi/cuda/test_base_objects.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/unittests/pi/cuda/test_interop_get_native.cpp b/sycl/unittests/pi/cuda/test_interop_get_native.cpp new file mode 100644 index 0000000000000..1aec0b4c26ab0 --- /dev/null +++ b/sycl/unittests/pi/cuda/test_interop_get_native.cpp @@ -0,0 +1,97 @@ +//==------- test_interop_get_native.cpp - SYCL CUDA get_native tests -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include +#include +#include +#include + +using namespace cl::sycl; + +struct DISABLED_CudaInteropGetNativeTests : public ::testing::Test { + +protected: + queue syclQueue_; + context syclContext_; + device syclDevice_; + + static bool isCudaDevice(const device &dev) { + const platform platform = dev.get_info(); + const std::string platformVersion = + platform.get_info(); + // If using PI_CUDA, don't accept a non-CUDA device + return platformVersion.find("CUDA") != std::string::npos; + } + + class cuda_device_selector : public device_selector { + public: + int operator()(const device &dev) const { + return isCudaDevice(dev) ? 1 : -1; + } + }; + + void SetUp() override { + syclQueue_ = queue{cuda_device_selector()}; + context syclContext_ = syclQueue_.get_context(); + device syclDevice_ = syclQueue_.get_device(); + ASSERT_TRUE(isCudaDevice(syclDevice_)); + } + + void TearDown() override {} +}; + +TEST_F(DISABLED_CudaInteropGetNativeTests, getNativeDevice) { + CUdevice cudaDevice = get_native(syclDevice_); + char cudaDeviceName[2] = {0, 0}; + CUresult result = cuDeviceGetName(cudaDeviceName, 2, cudaDevice); + ASSERT_EQ(result, CUDA_SUCCESS); + ASSERT_NE(cudaDeviceName[0], 0); +} + +TEST_F(DISABLED_CudaInteropGetNativeTests, getNativeContext) { + CUcontext cudaContext = get_native(syclContext_); + ASSERT_NE(cudaContext, nullptr); +} + +TEST_F(DISABLED_CudaInteropGetNativeTests, getNativeQueue) { + CUstream cudaStream = get_native(syclQueue_); + ASSERT_NE(cudaStream, nullptr); + + CUcontext streamContext = nullptr; + CUresult result = cuStreamGetCtx(cudaStream, &streamContext); + ASSERT_EQ(result, CUDA_SUCCESS); + + CUcontext cudaContext = get_native(syclContext_); + ASSERT_EQ(streamContext, cudaContext); +} + +TEST_F(DISABLED_CudaInteropGetNativeTests, interopTaskGetMem) { + buffer syclBuffer(range<1>{1}); + syclQueue_.submit([&](cl::sycl::handler &cgh) { + auto syclAccessor = syclBuffer.get_access(cgh); + cgh.interop_task([=](sycl::interop_handler ih) { + CUdeviceptr cudaPtr = ih.get_mem(syclAccessor); + CUdeviceptr cudaPtrBase; + size_t cudaPtrSize = 0; + cuMemGetAddressRange(&cudaPtrBase, &cudaPtrSize, cudaPtr); + ASSERT_EQ(cudaPtrSize, sizeof(int)); + }); + }); +} + +TEST_F(DISABLED_CudaInteropGetNativeTests, interopTaskGetBufferMem) { + CUstream cudaStream = get_native(syclQueue_); + syclQueue_.submit([&](cl::sycl::handler &cgh) { + cgh.interop_task([=](sycl::interop_handler ih) { + CUstream cudaInteropStream = ih.get_queue(); + ASSERT_EQ(cudaInteropStream, cudaStream); + }); + }); +} diff --git a/sycl/unittests/pi/cuda/test_mem_obj.cpp b/sycl/unittests/pi/cuda/test_mem_obj.cpp index 3715da83b68e8..8adf994ae2bdc 100644 --- a/sycl/unittests/pi/cuda/test_mem_obj.cpp +++ b/sycl/unittests/pi/cuda/test_mem_obj.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp index 199765debeeed..8e387cb4a2536 100644 --- a/sycl/unittests/pi/cuda/test_primary_context.cpp +++ b/sycl/unittests/pi/cuda/test_primary_context.cpp @@ -12,7 +12,6 @@ #include #include -#include #include #include @@ -20,61 +19,47 @@ using namespace cl::sycl; -void check(bool condition, const char *conditionString, const char *filename, - const long line) noexcept { - if (!condition) { - std::cerr << "CHECK failed in " << filename << "#" << line << " " - << conditionString << "\n"; - std::abort(); - } -} - -#define CHECK(CONDITION) check(CONDITION, #CONDITION, __FILE__, __LINE__) - -bool isCudaDevice(const device &dev) { - const platform platform = dev.get_info(); - const std::string platformVersion = - platform.get_info(); - // If using PI_CUDA, don't accept a non-CUDA device - return platformVersion.find("CUDA") != std::string::npos; -} - -class cuda_device_selector : public device_selector { -public: - int operator()(const device &dev) const { return isCudaDevice(dev) ? 1 : -1; } -}; - -class other_cuda_device_selector : public device_selector { -public: - other_cuda_device_selector(const device &dev) : excludeDevice{dev} {} - - int operator()(const device &dev) const { - if (!isCudaDevice(dev)) { - return -1; - } - if (dev.get() == excludeDevice.get()) { - // Return only this device if it is the only available - return 0; - } - return 1; - } - -private: - const device &excludeDevice; -}; - -using namespace cl::sycl; - struct DISABLED_CudaPrimaryContextTests : public ::testing::Test { protected: - std::vector Plugins; - - pi_platform platform_; device deviceA_; device deviceB_; context context_; + static bool isCudaDevice(const device &dev) { + const platform platform = dev.get_info(); + const std::string platformVersion = + platform.get_info(); + // If using PI_CUDA, don't accept a non-CUDA device + return platformVersion.find("CUDA") != std::string::npos; + } + + class cuda_device_selector : public device_selector { + public: + int operator()(const device &dev) const { + return isCudaDevice(dev) ? 1 : -1; + } + }; + + class other_cuda_device_selector : public device_selector { + public: + other_cuda_device_selector(const device &dev) : excludeDevice{dev} {} + + int operator()(const device &dev) const { + if (!isCudaDevice(dev)) { + return -1; + } + if (dev.get() == excludeDevice.get()) { + // Return only this device if it is the only available + return 0; + } + return 1; + } + + private: + const device &excludeDevice; + }; + void SetUp() override { try { @@ -96,8 +81,8 @@ TEST_F(DISABLED_CudaPrimaryContextTests, piSingleContext) { std::cout << "create single context" << std::endl; context Context(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); - CUdevice CudaDevice = reinterpret_cast(deviceA_.get())->get(); - CUcontext CudaContext = reinterpret_cast(Context.get())->get(); + CUdevice CudaDevice = deviceA_.get_native(); + CUcontext CudaContext = Context.get_native(); CUcontext PrimaryCudaContext; cuDevicePrimaryCtxRetain(&PrimaryCudaContext, CudaDevice); @@ -112,22 +97,23 @@ TEST_F(DISABLED_CudaPrimaryContextTests, piMultiContextSingleDevice) { context ContextA(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); context ContextB(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); - CUcontext CudaContextA = reinterpret_cast(ContextA.get())->get(); - CUcontext CudaContextB = reinterpret_cast(ContextB.get())->get(); + CUcontext CudaContextA = ContextA.get_native(); + CUcontext CudaContextB = ContextB.get_native(); ASSERT_EQ(CudaContextA, CudaContextB); } TEST_F(DISABLED_CudaPrimaryContextTests, piMultiContextMultiDevice) { - if (isCudaDevice(deviceB_) && deviceA_.get() != deviceB_.get()) { + CUdevice CudaDeviceA = deviceA_.get_native(); + CUdevice CudaDeviceB = deviceB_.get_native(); + + if (isCudaDevice(deviceB_) && CudaDeviceA != CudaDeviceB) { std::cout << "create multiple contexts for multiple devices" << std::endl; context ContextA(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); context ContextB(deviceB_, async_handler{}, /*UsePrimaryContext=*/true); - CUcontext CudaContextA = - reinterpret_cast(ContextA.get())->get(); - CUcontext CudaContextB = - reinterpret_cast(ContextB.get())->get(); + CUcontext CudaContextA = ContextA.get_native(); + CUcontext CudaContextB = ContextB.get_native(); ASSERT_NE(CudaContextA, CudaContextB); } diff --git a/sycl/unittests/pi/cuda/test_queue.cpp b/sycl/unittests/pi/cuda/test_queue.cpp index 38de62ec2dd71..ca983e4c55c79 100644 --- a/sycl/unittests/pi/cuda/test_queue.cpp +++ b/sycl/unittests/pi/cuda/test_queue.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include