Skip to content

Commit e0f8265

Browse files
mbrost05Thomas Hellström
authored andcommitted
drm/xe: Trigger queue cleanup if not in wedged mode 2
The intent of wedging a device is to allow queues to continue running only in wedged mode 2. In other modes, queues should initiate cleanup and signal all remaining fences. Fix xe_guc_submit_wedge to correctly clean up queues when wedge mode != 2. Fixes: 7dbe8af ("drm/xe: Wedge the entire device") Cc: stable@vger.kernel.org Reviewed-by: Zhanjun Dong <zhanjun.dong@intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Link: https://patch.msgid.link/20260310225039.1320161-4-zhanjun.dong@intel.com (cherry picked from commit e25ba41) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
1 parent fb37386 commit e0f8265

1 file changed

Lines changed: 22 additions & 13 deletions

File tree

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
12711271
*/
12721272
void xe_guc_submit_wedge(struct xe_guc *guc)
12731273
{
1274+
struct xe_device *xe = guc_to_xe(guc);
12741275
struct xe_gt *gt = guc_to_gt(guc);
12751276
struct xe_exec_queue *q;
12761277
unsigned long index;
@@ -1285,20 +1286,28 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
12851286
if (!guc->submission_state.initialized)
12861287
return;
12871288

1288-
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
1289-
guc_submit_wedged_fini, guc);
1290-
if (err) {
1291-
xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; "
1292-
"Although device is wedged.\n",
1293-
xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
1294-
return;
1295-
}
1289+
if (xe->wedged.mode == 2) {
1290+
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
1291+
guc_submit_wedged_fini, guc);
1292+
if (err) {
1293+
xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; "
1294+
"Although device is wedged.\n");
1295+
return;
1296+
}
12961297

1297-
mutex_lock(&guc->submission_state.lock);
1298-
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1299-
if (xe_exec_queue_get_unless_zero(q))
1300-
set_exec_queue_wedged(q);
1301-
mutex_unlock(&guc->submission_state.lock);
1298+
mutex_lock(&guc->submission_state.lock);
1299+
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1300+
if (xe_exec_queue_get_unless_zero(q))
1301+
set_exec_queue_wedged(q);
1302+
mutex_unlock(&guc->submission_state.lock);
1303+
} else {
1304+
/* Forcefully kill any remaining exec queues, signal fences */
1305+
guc_submit_reset_prepare(guc);
1306+
xe_guc_submit_stop(guc);
1307+
xe_guc_softreset(guc);
1308+
xe_uc_fw_sanitize(&guc->fw);
1309+
xe_guc_submit_pause_abort(guc);
1310+
}
13021311
}
13031312

13041313
static bool guc_submit_hint_wedged(struct xe_guc *guc)

0 commit comments

Comments
 (0)