Skip to content

Commit 3c714e3

Browse files
authored
riscv64: Fix replicated_imm5 lowering rule (#12956)
* riscv64: Fix `replicated_imm5` lowering rule This commit fixes the `replicated_imm5` lowering rule in the riscv64 backend when applied to `vconst` inputs. This is reachable in WebAssembly via `v128.const`, for example, or via Cranelift optimizations. The previous iteration of the rule did not take lane width into account and always reduced the immediate as much as possible where the reduction needed to halt once the lane size had been reached. The fix here is to refactor the definition to have a rule-per-lane-width which handles the input constant. This is accompanied with some minor refactorings around available ISLE rules and such to handle more bit widths in more locations, have more faithful signatures, etc. * Fix winch testing * Fix test exemption prtest:full
1 parent a4fef42 commit 3c714e3

7 files changed

Lines changed: 383 additions & 19 deletions

File tree

cranelift/codegen/src/isa/aarch64/inst.isle

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4112,13 +4112,13 @@
41124112
;; as it will make it easier to create the immediates in the instructions below.
41134113
(rule 5 (splat_const (u64_replicated_u32 n) (VectorSize.Size64x2))
41144114
(splat_const n (VectorSize.Size32x4)))
4115-
(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x4))
4115+
(rule 5 (splat_const (u32_from_u64 (u32_replicated_u16 n)) (VectorSize.Size32x4))
41164116
(splat_const n (VectorSize.Size16x8)))
4117-
(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x2))
4117+
(rule 5 (splat_const (u32_from_u64 (u32_replicated_u16 n)) (VectorSize.Size32x2))
41184118
(splat_const n (VectorSize.Size16x4)))
4119-
(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x8))
4119+
(rule 5 (splat_const (u16_from_u64 (u16_replicated_u8 n)) (VectorSize.Size16x8))
41204120
(splat_const n (VectorSize.Size8x16)))
4121-
(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x4))
4121+
(rule 5 (splat_const (u16_from_u64 (u16_replicated_u8 n)) (VectorSize.Size16x4))
41224122
(splat_const n (VectorSize.Size8x8)))
41234123

41244124
;; Special cases for `vec_dup_imm` instructions where the input is either

cranelift/codegen/src/isa/riscv64/inst.isle

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2020,9 +2020,15 @@
20202020
(decl imm5_from_i64 (Imm5) i64)
20212021
(extern extractor imm5_from_i64 imm5_from_i64)
20222022

2023-
;; Construct a Imm5 from an i8
2023+
;; Construct a Imm5 from an iNN
20242024
(decl pure partial i8_to_imm5 (i8) Imm5)
20252025
(extern constructor i8_to_imm5 i8_to_imm5)
2026+
(decl pure partial i16_to_imm5 (i16) Imm5)
2027+
(rule (i16_to_imm5 (i8_from_i16 n)) (i8_to_imm5 n))
2028+
(decl pure partial i32_to_imm5 (i32) Imm5)
2029+
(rule (i32_to_imm5 (i8_from_i32 n)) (i8_to_imm5 n))
2030+
(decl pure partial i64_to_imm5 (i64) Imm5)
2031+
(rule (i64_to_imm5 (i8_from_i64 n)) (i8_to_imm5 n))
20262032

20272033
;; Helper to go directly from a `Value` to an `Imm5`.
20282034
(decl imm5_from_value (Imm5) Value)
@@ -2037,11 +2043,20 @@
20372043
;; Constructor that matches a `Value` equivalent to a replicated Imm5 on all lanes.
20382044
(decl pure partial replicated_imm5 (Value) Imm5)
20392045
(rule (replicated_imm5 (splat _ (imm5_from_value n))) n)
2040-
(rule (replicated_imm5 (vconst _ (u128_from_constant n128)))
2046+
(rule (replicated_imm5 (vconst (multi_lane 64 _) (u128_from_constant n128)))
20412047
(if-let (u128_replicated_u64 n64) n128)
2042-
(if-let (u64_replicated_u32 n32) n64)
2043-
(if-let (u32_replicated_u16 n16) n32)
2044-
(if-let (u16_replicated_u8 n8) n16)
2048+
(if-let n (i64_to_imm5 (u64_cast_signed n64)))
2049+
n)
2050+
(rule (replicated_imm5 (vconst (multi_lane 32 _) (u128_from_constant n128)))
2051+
(if-let (u128_replicated_u32 n32) n128)
2052+
(if-let n (i32_to_imm5 (u32_cast_signed n32)))
2053+
n)
2054+
(rule (replicated_imm5 (vconst (multi_lane 16 _) (u128_from_constant n128)))
2055+
(if-let (u128_replicated_u16 n16) n128)
2056+
(if-let n (i16_to_imm5 (u16_cast_signed n16)))
2057+
n)
2058+
(rule (replicated_imm5 (vconst (multi_lane 8 _) (u128_from_constant n128)))
2059+
(if-let (u128_replicated_u8 n8) n128)
20452060
(if-let n (i8_to_imm5 (u8_cast_signed n8)))
20462061
n)
20472062

cranelift/codegen/src/isle_prelude.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -792,27 +792,25 @@ macro_rules! isle_common_prelude_methods {
792792
}
793793
}
794794

795-
fn u64_replicated_u32(&mut self, val: u64) -> Option<u64> {
795+
fn u64_replicated_u32(&mut self, val: u64) -> Option<u32> {
796796
let low32 = val as u32 as u64;
797797
if (low32 | (low32 << 32)) == val {
798-
Some(low32)
798+
Some(val as u32)
799799
} else {
800800
None
801801
}
802802
}
803803

804-
fn u32_replicated_u16(&mut self, val: u64) -> Option<u64> {
805-
let val = val as u32;
804+
fn u32_replicated_u16(&mut self, val: u32) -> Option<u16> {
806805
let low16 = val as u16 as u32;
807806
if (low16 | (low16 << 16)) == val {
808-
Some(low16.into())
807+
Some(low16 as u16)
809808
} else {
810809
None
811810
}
812811
}
813812

814-
fn u16_replicated_u8(&mut self, val: u64) -> Option<u8> {
815-
let val = val as u16;
813+
fn u16_replicated_u8(&mut self, val: u16) -> Option<u8> {
816814
let low8 = val as u8 as u16;
817815
if (low8 | (low8 << 8)) == val {
818816
Some(low8 as u8)

cranelift/codegen/src/prelude.isle

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,21 @@
115115
;; lower half of the input
116116
(decl u128_replicated_u64 (u64) u128)
117117
(extern extractor u128_replicated_u64 u128_replicated_u64)
118-
(decl u64_replicated_u32 (u64) u64)
118+
(decl u64_replicated_u32 (u32) u64)
119119
(extern extractor u64_replicated_u32 u64_replicated_u32)
120-
(decl u32_replicated_u16 (u64) u64)
120+
(decl u32_replicated_u16 (u16) u32)
121121
(extern extractor u32_replicated_u16 u32_replicated_u16)
122-
(decl u16_replicated_u8 (u8) u64)
122+
(decl u16_replicated_u8 (u8) u16)
123123
(extern extractor u16_replicated_u8 u16_replicated_u8)
124124

125+
;; Convenience extractors to go directly from u128 to something smaller
126+
(decl u128_replicated_u32 (u32) u128)
127+
(extractor (u128_replicated_u32 n) (u128_replicated_u64 (u64_replicated_u32 n)))
128+
(decl u128_replicated_u16 (u16) u128)
129+
(extractor (u128_replicated_u16 n) (u128_replicated_u32 (u32_replicated_u16 n)))
130+
(decl u128_replicated_u8 (u8) u128)
131+
(extractor (u128_replicated_u8 n) (u128_replicated_u16 (u16_replicated_u8 n)))
132+
125133
;; Get the low and high bits of a `u128` as `u64`s.
126134
(decl u128_low_bits (u128) u64)
127135
(extern constructor u128_low_bits u128_low_bits)

0 commit comments

Comments
 (0)