drm/amdkfd: fix suspend/resume all calls in mes based eviction path

Suspend/resume all gangs should be done with the device lock is held.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Harish Kasiviswanathan <harish.kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Jonathan Kim
2025-06-18 10:31:15 -04:00
committed by Alex Deucher
parent 277bb0f83e
commit 079ae5118e

View File

@@ -1209,6 +1209,15 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
pr_debug_ratelimited("Evicting process pid %d queues\n",
pdd->process->lead_thread->pid);
if (dqm->dev->kfd->shared_resources.enable_mes) {
pdd->last_evict_timestamp = get_jiffies_64();
retval = suspend_all_queues_mes(dqm);
if (retval) {
dev_err(dev, "Suspending all queues failed");
goto out;
}
}
/* Mark all queues as evicted. Deactivate all active queues on
* the qpd.
*/
@@ -1221,23 +1230,27 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
decrement_queue_count(dqm, qpd, q);
if (dqm->dev->kfd->shared_resources.enable_mes) {
int err;
err = remove_queue_mes(dqm, q, qpd);
if (err) {
retval = remove_queue_mes(dqm, q, qpd);
if (retval) {
dev_err(dev, "Failed to evict queue %d\n",
q->properties.queue_id);
retval = err;
goto out;
}
}
}
pdd->last_evict_timestamp = get_jiffies_64();
if (!dqm->dev->kfd->shared_resources.enable_mes)
if (!dqm->dev->kfd->shared_resources.enable_mes) {
pdd->last_evict_timestamp = get_jiffies_64();
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
USE_DEFAULT_GRACE_PERIOD);
} else {
retval = resume_all_queues_mes(dqm);
if (retval)
dev_err(dev, "Resuming all queues failed");
}
out:
dqm_unlock(dqm);
@@ -3098,61 +3111,17 @@ out:
return ret;
}
static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct device *dev = dqm->dev->adev->dev;
int ret = 0;
/* Check if process is already evicted */
dqm_lock(dqm);
if (qpd->evicted) {
/* Increment the evicted count to make sure the
* process stays evicted before its terminated.
*/
qpd->evicted++;
dqm_unlock(dqm);
goto out;
}
dqm_unlock(dqm);
ret = suspend_all_queues_mes(dqm);
if (ret) {
dev_err(dev, "Suspending all queues failed");
goto out;
}
ret = dqm->ops.evict_process_queues(dqm, qpd);
if (ret) {
dev_err(dev, "Evicting process queues failed");
goto out;
}
ret = resume_all_queues_mes(dqm);
if (ret)
dev_err(dev, "Resuming all queues failed");
out:
return ret;
}
int kfd_evict_process_device(struct kfd_process_device *pdd)
{
struct device_queue_manager *dqm;
struct kfd_process *p;
int ret = 0;
p = pdd->process;
dqm = pdd->dev->dqm;
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
if (dqm->dev->kfd->shared_resources.enable_mes)
ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd);
else
ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
return ret;
return dqm->ops.evict_process_queues(dqm, &pdd->qpd);
}
int reserve_debug_trap_vmid(struct device_queue_manager *dqm,