aarch64: Make csdb in JTSequence conditional (#12798)

alexcrichton · web-flow · commit 4c4ef3958f39 · 2026-03-18T09:33:08.000Z
Make this instruction's emission conditional based on `enable_table_access_spectre_mitigation` which is a rough equivalent in terms of preexisting Cranelift settings. In #12789 it's shown that this instruction has a very large performance impact on macOS aarch64 at least, and currently there's no way to turn this off even for testing. The goal of this commit is to at least not tamper with defaults while providing an escape hatch. This notably doesn't fix #12789 because this is such an obscure option I'd personally say the true thing to resolve is the default behavior, not knobs.
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -3186,8 +3186,11 @@ impl MachInstEmit for Inst {
                     rm: ridx,
                 };
                 inst.emit(sink, emit_info, state);
-                // Prevent any data value speculation.
-                Inst::Csdb.emit(sink, emit_info, state);
+                // Prevent any data value speculation if spectre mitigations are
+                // enabled.
+                if emit_info.0.enable_table_access_spectre_mitigation() {
+                    Inst::Csdb.emit(sink, emit_info, state);
+                }
 
                 // Load address of jump table
                 let inst = Inst::Adr { rd: rtmp1, off: 16 };
diff --git a/cranelift/filetests/filetests/isa/aarch64/jumptable-no-spectre.clif b/cranelift/filetests/filetests/isa/aarch64/jumptable-no-spectre.clif
@@ -0,0 +1,78 @@
+test compile precise-output
+set unwind_info=false
+set enable_table_access_spectre_mitigation=false
+target aarch64
+
+function %f(i32) -> i32 {
+block0(v0: i32):
+  br_table v0, block4, [block1, block2, block3]
+
+block1:
+  v1 = iconst.i32 1
+  jump block5(v1)
+
+block2:
+  v2 = iconst.i32 2
+  jump block5(v2)
+
+block3:
+  v3 = iconst.i32 3
+  jump block5(v3)
+
+block4:
+  v4 = iconst.i32 4
+  jump block5(v4)
+
+block5(v5: i32):
+  v6 = iadd.i32 v0, v5
+  return v6
+}
+
+; VCode:
+; block0:
+;   emit_island 44
+;   subs wzr, w0, #3
+;   b.hs label4 ; csel x11, xzr, x0, hs ; csdb ; adr x10, pc+16 ; ldrsw x11, [x10, x11, uxtw #2] ; add x10, x10, x11 ; br x10 ; jt_entries [MachLabel(3), MachLabel(2), MachLabel(1)]
+; block1:
+;   movz w5, #3
+;   b label5
+; block2:
+;   movz w5, #2
+;   b label5
+; block3:
+;   movz w5, #1
+;   b label5
+; block4:
+;   movz w5, #4
+;   b label5
+; block5:
+;   add w0, w0, w5
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   cmp w0, #3
+;   b.hs #0x40
+;   csel x11, xzr, x0, hs
+;   adr x10, #0x1c
+;   ldrsw x11, [x10, w11, uxtw #2]
+;   add x10, x10, x11
+;   br x10
+;   .byte 0x1c, 0x00, 0x00, 0x00
+;   .byte 0x14, 0x00, 0x00, 0x00
+;   .byte 0x0c, 0x00, 0x00, 0x00
+; block1: ; offset 0x28
+;   mov w5, #3
+;   b #0x44
+; block2: ; offset 0x30
+;   mov w5, #2
+;   b #0x44
+; block3: ; offset 0x38
+;   mov w5, #1
+;   b #0x44
+; block4: ; offset 0x40
+;   mov w5, #4
+; block5: ; offset 0x44
+;   add w0, w0, w5
+;   ret
+