@@ -176,7 +176,7 @@ void LlvmRuntimeExecutor::print_list_manager_info(void *list_manager,
176
176
void LlvmRuntimeExecutor::synchronize () {
177
177
if (config_->arch == Arch::cuda) {
178
178
#if defined(TI_WITH_CUDA)
179
- CUDAContext::get_instance ().make_current ();
179
+ auto context_guard = CUDAContext::get_instance ().get_guard ();
180
180
CUDADriver::get_instance ().stream_synchronize (nullptr );
181
181
#else
182
182
TI_ERROR (" No CUDA support" );
@@ -191,7 +191,7 @@ uint64 LlvmRuntimeExecutor::fetch_result_uint64(int i, uint64 *result_buffer) {
191
191
uint64 ret;
192
192
if (config_->arch == Arch::cuda) {
193
193
#if defined(TI_WITH_CUDA)
194
- CUDAContext::get_instance ().make_current ();
194
+ auto context_guard = CUDAContext::get_instance ().get_guard ();
195
195
CUDADriver::get_instance ().memcpy_device_to_host (&ret, result_buffer + i,
196
196
sizeof (uint64));
197
197
#else
@@ -373,6 +373,7 @@ void LlvmRuntimeExecutor::initialize_llvm_runtime_snodes(
373
373
result_buffer);
374
374
if (config_->arch == Arch::cuda) {
375
375
#if defined(TI_WITH_CUDA)
376
+ auto context_guard = CUDAContext::get_instance ().get_guard ();
376
377
CUDADriver::get_instance ().memset (root_buffer, 0 , rounded_size);
377
378
#else
378
379
TI_NOT_IMPLEMENTED
@@ -476,6 +477,7 @@ void LlvmRuntimeExecutor::fill_ndarray(const DeviceAllocation &alloc,
476
477
auto ptr = get_ndarray_alloc_info_ptr (alloc);
477
478
if (config_->arch == Arch::cuda) {
478
479
#if defined(TI_WITH_CUDA)
480
+ auto cuda_context = CUDAContext::get_instance ().get_guard ();
479
481
CUDADriver::get_instance ().memsetd32 ((void *)ptr, data, size);
480
482
#else
481
483
TI_NOT_IMPLEMENTED
@@ -515,9 +517,12 @@ void LlvmRuntimeExecutor::materialize_runtime(MemoryPool *memory_pool,
515
517
TaichiLLVMContext *tlctx = nullptr ;
516
518
if (config_->arch == Arch::cuda) {
517
519
#if defined(TI_WITH_CUDA)
518
- CUDADriver::get_instance ().malloc (
519
- (void **)result_buffer_ptr,
520
- sizeof (uint64) * taichi_result_buffer_entries);
520
+ {
521
+ auto context_guard = CUDAContext::get_instance ().get_guard ();
522
+ CUDADriver::get_instance ().malloc (
523
+ (void **)result_buffer_ptr,
524
+ sizeof (uint64) * taichi_result_buffer_entries);
525
+ }
521
526
const auto total_mem = runtime_mem_info_->get_total_memory ();
522
527
if (config_->device_memory_fraction == 0 ) {
523
528
TI_ASSERT (config_->device_memory_GB > 0 );
@@ -537,9 +542,11 @@ void LlvmRuntimeExecutor::materialize_runtime(MemoryPool *memory_pool,
537
542
cuda::CudaDevice::AllocInfo preallocated_device_buffer_alloc_info =
538
543
cuda_device ()->get_alloc_info (preallocated_device_buffer_alloc_);
539
544
preallocated_device_buffer_ = preallocated_device_buffer_alloc_info.ptr ;
540
-
541
- CUDADriver::get_instance ().memset (preallocated_device_buffer_, 0 ,
542
- prealloc_size);
545
+ {
546
+ auto context_guard = CUDAContext::get_instance ().get_guard ();
547
+ CUDADriver::get_instance ().memset (preallocated_device_buffer_, 0 ,
548
+ prealloc_size);
549
+ }
543
550
tlctx = llvm_context_device_.get ();
544
551
#else
545
552
TI_NOT_IMPLEMENTED
@@ -612,11 +619,6 @@ void LlvmRuntimeExecutor::materialize_runtime(MemoryPool *memory_pool,
612
619
" LLVMRuntime_set_profiler_stop" , llvm_runtime_,
613
620
(void *)&KernelProfilerBase::profiler_stop);
614
621
}
615
- #if defined(TI_WITH_CUDA)
616
- if (config_->arch == Arch::cuda) {
617
- CUDADriver::get_instance ().context_pop_current (nullptr );
618
- }
619
- #endif
620
622
}
621
623
622
624
void LlvmRuntimeExecutor::destroy_snode_tree (SNodeTree *snode_tree) {
0 commit comments