Skip to content

Commit 4c4ef39

Browse files
authored
aarch64: Make csdb in JTSequence conditional (#12798)
Make this instruction's emission conditional based on `enable_table_access_spectre_mitigation` which is a rough equivalent in terms of preexisting Cranelift settings. In #12789 it's shown that this instruction has a very large performance impact on macOS aarch64 at least, and currently there's no way to turn this off even for testing. The goal of this commit is to at least not tamper with defaults while providing an escape hatch. This notably doesn't fix #12789 because this is such an obscure option I'd personally say the true thing to resolve is the default behavior, not knobs.
1 parent bb5a845 commit 4c4ef39

2 files changed

Lines changed: 83 additions & 2 deletions

File tree

cranelift/codegen/src/isa/aarch64/inst/emit.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3186,8 +3186,11 @@ impl MachInstEmit for Inst {
31863186
rm: ridx,
31873187
};
31883188
inst.emit(sink, emit_info, state);
3189-
// Prevent any data value speculation.
3190-
Inst::Csdb.emit(sink, emit_info, state);
3189+
// Prevent any data value speculation if spectre mitigations are
3190+
// enabled.
3191+
if emit_info.0.enable_table_access_spectre_mitigation() {
3192+
Inst::Csdb.emit(sink, emit_info, state);
3193+
}
31913194

31923195
// Load address of jump table
31933196
let inst = Inst::Adr { rd: rtmp1, off: 16 };
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
test compile precise-output
2+
set unwind_info=false
3+
set enable_table_access_spectre_mitigation=false
4+
target aarch64
5+
6+
function %f(i32) -> i32 {
7+
block0(v0: i32):
8+
br_table v0, block4, [block1, block2, block3]
9+
10+
block1:
11+
v1 = iconst.i32 1
12+
jump block5(v1)
13+
14+
block2:
15+
v2 = iconst.i32 2
16+
jump block5(v2)
17+
18+
block3:
19+
v3 = iconst.i32 3
20+
jump block5(v3)
21+
22+
block4:
23+
v4 = iconst.i32 4
24+
jump block5(v4)
25+
26+
block5(v5: i32):
27+
v6 = iadd.i32 v0, v5
28+
return v6
29+
}
30+
31+
; VCode:
32+
; block0:
33+
; emit_island 44
34+
; subs wzr, w0, #3
35+
; b.hs label4 ; csel x11, xzr, x0, hs ; csdb ; adr x10, pc+16 ; ldrsw x11, [x10, x11, uxtw #2] ; add x10, x10, x11 ; br x10 ; jt_entries [MachLabel(3), MachLabel(2), MachLabel(1)]
36+
; block1:
37+
; movz w5, #3
38+
; b label5
39+
; block2:
40+
; movz w5, #2
41+
; b label5
42+
; block3:
43+
; movz w5, #1
44+
; b label5
45+
; block4:
46+
; movz w5, #4
47+
; b label5
48+
; block5:
49+
; add w0, w0, w5
50+
; ret
51+
;
52+
; Disassembled:
53+
; block0: ; offset 0x0
54+
; cmp w0, #3
55+
; b.hs #0x40
56+
; csel x11, xzr, x0, hs
57+
; adr x10, #0x1c
58+
; ldrsw x11, [x10, w11, uxtw #2]
59+
; add x10, x10, x11
60+
; br x10
61+
; .byte 0x1c, 0x00, 0x00, 0x00
62+
; .byte 0x14, 0x00, 0x00, 0x00
63+
; .byte 0x0c, 0x00, 0x00, 0x00
64+
; block1: ; offset 0x28
65+
; mov w5, #3
66+
; b #0x44
67+
; block2: ; offset 0x30
68+
; mov w5, #2
69+
; b #0x44
70+
; block3: ; offset 0x38
71+
; mov w5, #1
72+
; b #0x44
73+
; block4: ; offset 0x40
74+
; mov w5, #4
75+
; block5: ; offset 0x44
76+
; add w0, w0, w5
77+
; ret
78+

0 commit comments

Comments
 (0)