Skip to content

Commit 05762d9

Browse files
jcornwallAMDalexdeucher
authored andcommitted
drm/amdkfd: gfx12.1 trap handler instruction fixup for VOP3PX
A trap may occur in the middle of VOP3PX instruction co-issue. The PC would be restored incorrectly if left unmodified. Identify this case by examining the instruction opcode and rewind the PC 8 bytes if it occurs. Signed-off-by: Jay Cornwall <jay.cornwall@amd.com> Reviewed-by: Lancelot Six <lancelot.six@amd.com> Reviewed-by: Vladimir Indic <vladimir.indic@amd.com> Cc: Shweta Khatri <shweta.khatri@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 1197366 commit 05762d9

2 files changed

Lines changed: 121 additions & 101 deletions

File tree

drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h

Lines changed: 103 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -4587,14 +4587,14 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
45874587
};
45884588

45894589
static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
4590-
0xbfa00001, 0xbfa003ac,
4590+
0xbfa00001, 0xbfa003be,
45914591
0xb0804009, 0xb8f8f804,
45924592
0x9178ff78, 0x00008c00,
45934593
0xb8fbf811, 0x8b6eff78,
45944594
0x00004000, 0xbfa10008,
45954595
0x8b6eff7b, 0x00000080,
45964596
0xbfa20018, 0x8b6ea07b,
4597-
0xbfa200d1, 0xbf830010,
4597+
0xbfa200da, 0xbf830010,
45984598
0xb8fbf811, 0xbfa0fffb,
45994599
0x8b6eff7b, 0x00000bd0,
46004600
0xbfa20010, 0xb8eef812,
@@ -4605,7 +4605,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
46054605
0xf0000000, 0xbfa20005,
46064606
0x8b6fff6f, 0x00000200,
46074607
0xbfa20002, 0x8b6ea07b,
4608-
0xbfa200bb, 0x9177ff77,
4608+
0xbfa200c4, 0x9177ff77,
46094609
0x007fc000, 0xb8fa04a1,
46104610
0x847a967a, 0x8c777a77,
46114611
0xb8fa0421, 0x847a957a,
@@ -4632,43 +4632,46 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
46324632
0xbfa00002, 0x806c846c,
46334633
0x826d806d, 0x8b6dff6d,
46344634
0x01ffffff, 0xb8fbf811,
4635-
0xbf0d847b, 0xbfa20078,
4635+
0xbf0d847b, 0xbfa20081,
46364636
0xf4003eb6, 0xf8000000,
46374637
0xbfc70000, 0xf4003bb6,
46384638
0xf8000008, 0x8b76ff7a,
46394639
0x80000000, 0xbfa20027,
46404640
0x9376ff7a, 0x00060019,
46414641
0x81f9a376, 0xbf0b8179,
4642-
0xbfa20068, 0x81f9ac76,
4643-
0xbf0b8179, 0xbfa20062,
4642+
0xbfa2006e, 0x81f9ac76,
4643+
0xbf0b8179, 0xbfa20068,
46444644
0x81f9b776, 0xbf0b8179,
4645-
0xbfa2005f, 0x8b76ff7a,
4645+
0xbfa20065, 0x8b76ff7a,
46464646
0x000001ff, 0xbf06ff76,
4647-
0x000000fe, 0xbfa2005d,
4647+
0x000000fe, 0xbfa20063,
46484648
0xbf06ff76, 0x000000ff,
4649-
0xbfa20057, 0xbf06ff76,
4650-
0x000000fa, 0xbfa20054,
4649+
0xbfa2005d, 0xbf06ff76,
4650+
0x000000fa, 0xbfa2005a,
46514651
0x81f9ff76, 0x000000e9,
4652-
0xbf0b8179, 0xbfa20050,
4652+
0xbf0b8179, 0xbfa20056,
46534653
0x8b76ff7b, 0xffff0000,
46544654
0xbf06ff76, 0xbf860000,
4655-
0xbfa10051, 0x9376ff7b,
4655+
0xbfa1005a, 0x9376ff7b,
46564656
0x0002000e, 0x8b79ff7b,
46574657
0x00003f00, 0x85798679,
46584658
0x8c767976, 0xb9763b01,
4659-
0xbfa00049, 0x8b76ff7a,
4659+
0xbfa00052, 0x8b76ff7a,
46604660
0xfc000000, 0xbf06ff76,
4661-
0xd4000000, 0xbfa20013,
4661+
0xd4000000, 0xbfa20019,
46624662
0xbf06ff76, 0xc8000000,
4663-
0xbfa20027, 0x8b76ff7a,
4663+
0xbfa2002d, 0x8b76ff7a,
46644664
0xff000000, 0xbf06ff76,
4665-
0xcf000000, 0xbfa20039,
4665+
0xcf000000, 0xbfa2003f,
46664666
0x8b79ff7a, 0xffff0000,
4667+
0xbf06ff79, 0xcc330000,
4668+
0xbfa2003d, 0xbf06ff79,
4669+
0xcc880000, 0xbfa2003a,
46674670
0xbf06ff79, 0xcc350000,
4668-
0xbfa20037, 0xbf06ff79,
4669-
0xcc3a0000, 0xbfa20034,
4671+
0xbfa2003a, 0xbf06ff79,
4672+
0xcc3a0000, 0xbfa20037,
46704673
0xbf06ff76, 0xcc000000,
4671-
0xbfa10031, 0x8b76ff7b,
4674+
0xbfa10034, 0x8b76ff7b,
46724675
0x000001ff, 0xbf06ff76,
46734676
0x000000ff, 0xbfa20029,
46744677
0xbf06ff76, 0x000000fa,
@@ -4691,86 +4694,92 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
46914694
0x000001ff, 0xbf06ff76,
46924695
0x000000ff, 0xbfa20003,
46934696
0xbfc70000, 0xbefb006e,
4694-
0xbfa0ffad, 0xbfc70000,
4695-
0xbefb006f, 0xbfa0ffaa,
4696-
0xbfc70000, 0x857a9677,
4697-
0xb97a04a1, 0x857a9577,
4698-
0xb97a0421, 0x857a8e77,
4699-
0xb97a3021, 0x8bfe7e7e,
4700-
0x8bea6a6a, 0x85788978,
4701-
0xb9783244, 0xbe804a6c,
4702-
0xb8faf802, 0xbf0d987a,
4703-
0xbfa10001, 0xbfb00000,
4704-
0x8b6dff6d, 0x01ffffff,
4705-
0xbefa0080, 0xb97a0151,
4706-
0x9177ff77, 0x007fc000,
4707-
0xb8fa04a1, 0x847a967a,
4708-
0x8c777a77, 0xb8fa0421,
4709-
0x847a957a, 0x8c777a77,
4710-
0xb8fa3021, 0x847a8e7a,
4711-
0x8c777a77, 0xb980f821,
4712-
0x00000000, 0xbf0d847b,
4713-
0xbfa20078, 0xf4003eb6,
4714-
0xf8000000, 0xbfc70000,
4715-
0xf4003bb6, 0xf8000008,
4716-
0x8b76ff7a, 0x80000000,
4717-
0xbfa20027, 0x9376ff7a,
4718-
0x00060019, 0x81f9a376,
4697+
0xbfa0ffa7, 0xbfc70000,
4698+
0xbefb006f, 0xbfa0ffa4,
4699+
0x80ec886c, 0x82ed806d,
4700+
0xbfa0fff7, 0xbfc70000,
4701+
0x857a9677, 0xb97a04a1,
4702+
0x857a9577, 0xb97a0421,
4703+
0x857a8e77, 0xb97a3021,
4704+
0x8bfe7e7e, 0x8bea6a6a,
4705+
0x85788978, 0xb9783244,
4706+
0xbe804a6c, 0xb8faf802,
4707+
0xbf0d987a, 0xbfa10001,
4708+
0xbfb00000, 0x8b6dff6d,
4709+
0x01ffffff, 0xbefa0080,
4710+
0xb97a0151, 0x9177ff77,
4711+
0x007fc000, 0xb8fa04a1,
4712+
0x847a967a, 0x8c777a77,
4713+
0xb8fa0421, 0x847a957a,
4714+
0x8c777a77, 0xb8fa3021,
4715+
0x847a8e7a, 0x8c777a77,
4716+
0xb980f821, 0x00000000,
4717+
0xbf0d847b, 0xbfa20081,
4718+
0xf4003eb6, 0xf8000000,
4719+
0xbfc70000, 0xf4003bb6,
4720+
0xf8000008, 0x8b76ff7a,
4721+
0x80000000, 0xbfa20027,
4722+
0x9376ff7a, 0x00060019,
4723+
0x81f9a376, 0xbf0b8179,
4724+
0xbfa2006e, 0x81f9ac76,
47194725
0xbf0b8179, 0xbfa20068,
4720-
0x81f9ac76, 0xbf0b8179,
4721-
0xbfa20062, 0x81f9b776,
4722-
0xbf0b8179, 0xbfa2005f,
4723-
0x8b76ff7a, 0x000001ff,
4724-
0xbf06ff76, 0x000000fe,
4726+
0x81f9b776, 0xbf0b8179,
4727+
0xbfa20065, 0x8b76ff7a,
4728+
0x000001ff, 0xbf06ff76,
4729+
0x000000fe, 0xbfa20063,
4730+
0xbf06ff76, 0x000000ff,
47254731
0xbfa2005d, 0xbf06ff76,
4726-
0x000000ff, 0xbfa20057,
4732+
0x000000fa, 0xbfa2005a,
4733+
0x81f9ff76, 0x000000e9,
4734+
0xbf0b8179, 0xbfa20056,
4735+
0x8b76ff7b, 0xffff0000,
4736+
0xbf06ff76, 0xbf860000,
4737+
0xbfa1005a, 0x9376ff7b,
4738+
0x0002000e, 0x8b79ff7b,
4739+
0x00003f00, 0x85798679,
4740+
0x8c767976, 0xb9763b01,
4741+
0xbfa00052, 0x8b76ff7a,
4742+
0xfc000000, 0xbf06ff76,
4743+
0xd4000000, 0xbfa20019,
4744+
0xbf06ff76, 0xc8000000,
4745+
0xbfa2002d, 0x8b76ff7a,
4746+
0xff000000, 0xbf06ff76,
4747+
0xcf000000, 0xbfa2003f,
4748+
0x8b79ff7a, 0xffff0000,
4749+
0xbf06ff79, 0xcc330000,
4750+
0xbfa2003d, 0xbf06ff79,
4751+
0xcc880000, 0xbfa2003a,
4752+
0xbf06ff79, 0xcc350000,
4753+
0xbfa2003a, 0xbf06ff79,
4754+
0xcc3a0000, 0xbfa20037,
4755+
0xbf06ff76, 0xcc000000,
4756+
0xbfa10034, 0x8b76ff7b,
4757+
0x000001ff, 0xbf06ff76,
4758+
0x000000ff, 0xbfa20029,
47274759
0xbf06ff76, 0x000000fa,
4728-
0xbfa20054, 0x81f9ff76,
4729-
0x000000e9, 0xbf0b8179,
4730-
0xbfa20050, 0x8b76ff7b,
4731-
0xffff0000, 0xbf06ff76,
4732-
0xbf860000, 0xbfa10051,
4733-
0x9376ff7b, 0x0002000e,
4734-
0x8b79ff7b, 0x00003f00,
4735-
0x85798679, 0x8c767976,
4736-
0xb9763b01, 0xbfa00049,
4737-
0x8b76ff7a, 0xfc000000,
4738-
0xbf06ff76, 0xd4000000,
4739-
0xbfa20013, 0xbf06ff76,
4740-
0xc8000000, 0xbfa20027,
4741-
0x8b76ff7a, 0xff000000,
4742-
0xbf06ff76, 0xcf000000,
4743-
0xbfa20039, 0x8b79ff7a,
4744-
0xffff0000, 0xbf06ff79,
4745-
0xcc350000, 0xbfa20037,
4746-
0xbf06ff79, 0xcc3a0000,
4747-
0xbfa20034, 0xbf06ff76,
4748-
0xcc000000, 0xbfa10031,
4749-
0x8b76ff7b, 0x000001ff,
4750-
0xbf06ff76, 0x000000ff,
4751-
0xbfa20029, 0xbf06ff76,
4752-
0x000000fa, 0xbfa20026,
4753-
0x81f6ff76, 0x000000e9,
4754-
0xbf0b8176, 0xbfa20022,
4755-
0x8b76ff7b, 0x0003fe00,
4756-
0xbf06ff76, 0x0001fe00,
4757-
0xbfa2001d, 0x8b76ff7b,
4758-
0x07fc0000, 0xbf06ff76,
4759-
0x03fc0000, 0xbfa20018,
4760-
0xbfa00014, 0x9376ff7a,
4761-
0x00040016, 0x81f68176,
4762-
0xbf0b8176, 0xbfa20012,
4763-
0x9376ff7a, 0x00050011,
4760+
0xbfa20026, 0x81f6ff76,
4761+
0x000000e9, 0xbf0b8176,
4762+
0xbfa20022, 0x8b76ff7b,
4763+
0x0003fe00, 0xbf06ff76,
4764+
0x0001fe00, 0xbfa2001d,
4765+
0x8b76ff7b, 0x07fc0000,
4766+
0xbf06ff76, 0x03fc0000,
4767+
0xbfa20018, 0xbfa00014,
4768+
0x9376ff7a, 0x00040016,
47644769
0x81f68176, 0xbf0b8176,
4765-
0xbfa2000d, 0x8b76ff7a,
4766-
0x000001ff, 0xbf06ff76,
4767-
0x000000ff, 0xbfa20008,
4768-
0x8b76ff7b, 0x000001ff,
4770+
0xbfa20012, 0x9376ff7a,
4771+
0x00050011, 0x81f68176,
4772+
0xbf0b8176, 0xbfa2000d,
4773+
0x8b76ff7a, 0x000001ff,
47694774
0xbf06ff76, 0x000000ff,
4770-
0xbfa20003, 0xbfc70000,
4771-
0xbefb006e, 0xbfa0ffad,
4772-
0xbfc70000, 0xbefb006f,
4773-
0xbfa0ffaa, 0xbfc70000,
4775+
0xbfa20008, 0x8b76ff7b,
4776+
0x000001ff, 0xbf06ff76,
4777+
0x000000ff, 0xbfa20003,
4778+
0xbfc70000, 0xbefb006e,
4779+
0xbfa0ffa7, 0xbfc70000,
4780+
0xbefb006f, 0xbfa0ffa4,
4781+
0x80ec886c, 0x82ed806d,
4782+
0xbfa0fff7, 0xbfc70000,
47744783
0xbeee007e, 0xbeef007f,
47754784
0xbefe0180, 0xbefe4d84,
47764785
0xbf8a0000, 0x8b7aff7f,

drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3)
3838
#define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3)
3939
#define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12)
40+
#define HAVE_INSTRUCTION_FIXUP (ASIC_FAMILY == CHIP_GC_12_0_3)
4041

4142
#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
4243
#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
@@ -375,9 +376,9 @@ L_TRAP_CASE:
375376
L_EXIT_TRAP:
376377
s_and_b32 ttmp1, ttmp1, ADDRESS_HI32_MASK
377378

378-
#if HAVE_BANKED_VGPRS
379+
#if HAVE_INSTRUCTION_FIXUP
379380
s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
380-
fixup_vgpr_bank_selection()
381+
fixup_instruction()
381382
#endif
382383

383384
#if HAVE_XNACK
@@ -418,8 +419,8 @@ L_HAVE_VGPRS:
418419
save_and_clear_xnack_state_priv(s_save_tmp)
419420
#endif
420421

421-
#if HAVE_BANKED_VGPRS
422-
fixup_vgpr_bank_selection()
422+
#if HAVE_INSTRUCTION_FIXUP
423+
fixup_instruction()
423424
#endif
424425

425426
/* inform SPI the readiness and wait for SPI's go signal */
@@ -1400,8 +1401,8 @@ L_BARRIER_RESTORE_LOOP:
14001401
L_BARRIER_RESTORE_DONE:
14011402
end
14021403

1403-
#if HAVE_BANKED_VGPRS
1404-
function fixup_vgpr_bank_selection
1404+
#if HAVE_INSTRUCTION_FIXUP
1405+
function fixup_instruction
14051406
// PC read may fault if memory violation has been asserted.
14061407
// In this case no further progress is expected so fixup is not needed.
14071408
s_bitcmp1_b32 s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_SHIFT
@@ -1480,8 +1481,13 @@ L_FIXUP_NOT_VOP12C:
14801481
s_cmp_eq_u32 ttmp10, 0xcf000000 // If 31:24 = 0xcf, this is VOPD3
14811482
s_cbranch_scc1 L_FIXUP_THREE_DWORD // If VOPD3, 3 DWORD inst
14821483
// Not VOP1, VOP2, VOPC, VOP3, VOP3SD, VOPD, or VOPD3.
1483-
// Might be in VOP3P, but we must ensure we are not VOP3PX2
1484+
// Check if we are in the middle of VOP3PX.
14841485
s_and_b32 ttmp13, ttmp14, 0xffff0000 // Bits 31:16
1486+
s_cmp_eq_u32 ttmp13, 0xcc330000 // If 31:16 = 0xcc33, this is 8 bytes past VOP3PX
1487+
s_cbranch_scc1 L_FIXUP_VOP3PX_MIDDLE
1488+
s_cmp_eq_u32 ttmp13, 0xcc880000 // If 31:16 = 0xcc88, this is 8 bytes past VOP3PX
1489+
s_cbranch_scc1 L_FIXUP_VOP3PX_MIDDLE
1490+
// Might be in VOP3P, but we must ensure we are not VOP3PX2
14851491
s_cmp_eq_u32 ttmp13, 0xcc350000 // If 31:16 = 0xcc35, this is VOP3PX2
14861492
s_cbranch_scc1 L_FIXUP_DONE // If VOP3PX2, no fixup needed
14871493
s_cmp_eq_u32 ttmp13, 0xcc3a0000 // If 31:16 = 0xcc3a, this is VOP3PX2
@@ -1542,6 +1548,11 @@ L_FIXUP_THREE_DWORD:
15421548
s_mov_b32 ttmp15, ttmp3 // Move possible S_SET_VGPR_MSB into ttmp15
15431549
s_branch L_FIXUP_ONE_DWORD // Go to common logic that checks if it is S_SET_VGPR_MSB
15441550

1551+
L_FIXUP_VOP3PX_MIDDLE:
1552+
s_sub_co_u32 ttmp0, ttmp0, 8 // Rewind PC 8 bytes to beginning of instruction
1553+
s_sub_co_ci_u32 ttmp1, ttmp1, 0
1554+
s_branch L_FIXUP_TWO_DWORD // 2 DWORD inst (2nd half of a 4 DWORD inst)
1555+
15451556
L_FIXUP_DONE:
15461557
s_wait_kmcnt 0 // Ensure load of ttmp2 and ttmp3 is done
15471558
end

0 commit comments

Comments
 (0)