Skip to content

Commit 438b39a

Browse files
Jie1zhangalexdeucher
authored andcommitted
drm/amdkfd: pause autosuspend when creating pdd
When using MES creating a pdd will require talking to the GPU to setup the relevant context. The code here forgot to wake up the GPU in case it was in suspend, this causes KVM to EFAULT for passthrough GPU for example. This issue can be masked if the GPU was woken up by other things (e.g. opening the KMS node) first and have not yet gone to sleep. v4: do the allocation of proc_ctx_bo in a lazy fashion when the first queue is created in a process (Felix) Signed-off-by: Jesse Zhang <[email protected]> Reviewed-by: Yunxiang Li <[email protected]> Signed-off-by: Alex Deucher <[email protected]> Cc: [email protected]
1 parent f4df208 commit 438b39a

File tree

2 files changed

+17
-21
lines changed

2 files changed

+17
-21
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

+15
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
207207
if (!down_read_trylock(&adev->reset_domain->sem))
208208
return -EIO;
209209

210+
if (!pdd->proc_ctx_cpu_ptr) {
211+
r = amdgpu_amdkfd_alloc_gtt_mem(adev,
212+
AMDGPU_MES_PROC_CTX_SIZE,
213+
&pdd->proc_ctx_bo,
214+
&pdd->proc_ctx_gpu_addr,
215+
&pdd->proc_ctx_cpu_ptr,
216+
false);
217+
if (r) {
218+
dev_err(adev->dev,
219+
"failed to allocate process context bo\n");
220+
return r;
221+
}
222+
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
223+
}
224+
210225
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
211226
queue_input.process_id = qpd->pqm->process->pasid;
212227
queue_input.page_table_base_addr = qpd->page_table_base;

drivers/gpu/drm/amd/amdkfd/kfd_process.c

+2-21
Original file line numberDiff line numberDiff line change
@@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
10761076

10771077
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
10781078

1079-
if (pdd->dev->kfd->shared_resources.enable_mes)
1079+
if (pdd->dev->kfd->shared_resources.enable_mes &&
1080+
pdd->proc_ctx_cpu_ptr)
10801081
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
10811082
&pdd->proc_ctx_bo);
10821083
/*
@@ -1608,7 +1609,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
16081609
struct kfd_process *p)
16091610
{
16101611
struct kfd_process_device *pdd = NULL;
1611-
int retval = 0;
16121612

16131613
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
16141614
return NULL;
@@ -1632,21 +1632,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
16321632
pdd->user_gpu_id = dev->id;
16331633
atomic64_set(&pdd->evict_duration_counter, 0);
16341634

1635-
if (dev->kfd->shared_resources.enable_mes) {
1636-
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
1637-
AMDGPU_MES_PROC_CTX_SIZE,
1638-
&pdd->proc_ctx_bo,
1639-
&pdd->proc_ctx_gpu_addr,
1640-
&pdd->proc_ctx_cpu_ptr,
1641-
false);
1642-
if (retval) {
1643-
dev_err(dev->adev->dev,
1644-
"failed to allocate process context bo\n");
1645-
goto err_free_pdd;
1646-
}
1647-
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
1648-
}
1649-
16501635
p->pdds[p->n_pdds++] = pdd;
16511636
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
16521637
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@@ -1658,10 +1643,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
16581643
idr_init(&pdd->alloc_idr);
16591644

16601645
return pdd;
1661-
1662-
err_free_pdd:
1663-
kfree(pdd);
1664-
return NULL;
16651646
}
16661647

16671648
/**

0 commit comments

Comments
 (0)