Skip to content

Commit ed55592

Browse files
authored
Properly verify alignment in string transcoding (#13012)
This commit updates string transcoding between guest modules to properly verify alignment. Previously alignment was only verified on the first allocation, not reallocations, which is not spec-compliant. This additionally fixes a possible host panic when dealing with unaligned pointers.
1 parent f89d910 commit ed55592

2 files changed

Lines changed: 291 additions & 0 deletions

File tree

crates/environ/src/fact/trampoline.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2128,6 +2128,7 @@ impl<'a, 'b> Compiler<'a, 'b> {
21282128
}
21292129
}));
21302130
self.instruction(LocalSet(dst.ptr.idx));
2131+
self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2);
21312132
self.instruction(End); // end of shrink-to-fit
21322133

21332134
self.free_temp_local(dst_byte_len);
@@ -2222,6 +2223,7 @@ impl<'a, 'b> Compiler<'a, 'b> {
22222223
self.instruction(LocalGet(dst.len.idx)); // new_size
22232224
self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32()));
22242225
self.instruction(LocalSet(dst.ptr.idx));
2226+
self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2);
22252227

22262228
self.free_temp_local(dst_byte_len);
22272229
self.free_temp_local(src_byte_len);
@@ -2303,6 +2305,7 @@ impl<'a, 'b> Compiler<'a, 'b> {
23032305
self.instruction(LocalGet(dst.len.idx)); // new_size
23042306
self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32()));
23052307
self.instruction(LocalSet(dst.ptr.idx));
2308+
self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2);
23062309
self.instruction(End);
23072310

23082311
// In this block the latin1 encoding failed. The host transcode
@@ -2328,6 +2331,7 @@ impl<'a, 'b> Compiler<'a, 'b> {
23282331
self.instruction(LocalTee(dst_byte_len.idx));
23292332
self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32()));
23302333
self.instruction(LocalSet(dst.ptr.idx));
2334+
self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2);
23312335

23322336
// Call the host utf16 transcoding function. This will inflate the
23332337
// prior latin1 bytes and then encode the rest of the source string
@@ -2367,6 +2371,7 @@ impl<'a, 'b> Compiler<'a, 'b> {
23672371
self.ptr_shl(dst_mem_opts);
23682372
self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32()));
23692373
self.instruction(LocalSet(dst.ptr.idx));
2374+
self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2);
23702375
self.instruction(End);
23712376

23722377
// Tag the returned pointer as utf16

tests/misc_testsuite/component-model/strings.wast

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
;;! multi_memory = true
2+
13
;; Returning an unaligned utf16 string is invalid
24
(component definition $A
35
(core module $m
@@ -19,3 +21,287 @@
1921
(assert_trap (invoke "f1") "string pointer not aligned to 2")
2022
(component instance $A $A)
2123
(assert_trap (invoke "f2") "string pointer not aligned to 2")
24+
25+
;; utf8 -> utf16 -- when shrinking memory it must be aligned
26+
(component
27+
(component $c
28+
(core module $m
29+
(func (export "") (param i32 i32) unreachable)
30+
(func (export "realloc") (param $old_ptr i32) (param $old_size i32)
31+
(param $align i32) (param $new_size i32) (result i32)
32+
(if (i32.ne (local.get $align) (i32.const 2)) (then unreachable))
33+
(if (result i32) (i32.eqz (local.get $old_ptr))
34+
(then (i32.const 2)) ;; first allocation aligned
35+
(else (i32.const 3)) ;; second allocation unaligned
36+
)
37+
)
38+
(memory (export "memory") 1)
39+
)
40+
(core instance $m (instantiate $m))
41+
(func (export "a") (param "a" string)
42+
(canon lift
43+
(core func $m "")
44+
(realloc (func $m "realloc"))
45+
(memory $m "memory")
46+
string-encoding=utf16)
47+
)
48+
)
49+
50+
(component $c2
51+
(import "a" (func $f (param "a" string)))
52+
(core module $libc
53+
(memory (export "memory") 1)
54+
;; "àà" is 2 UTF-16 code units (4 bytes), and 4 bytes in UTF-8
55+
;; Pessimistic alloc = 4 * 2 = 8 bytes, shrinks to 4 bytes after.
56+
(data (memory 0) (i32.const 0) "àà")
57+
)
58+
(core instance $libc (instantiate $libc))
59+
(core func $f (canon lower (func $f) string-encoding=utf8 (memory $libc "memory")))
60+
(core module $m
61+
(import "" "" (func $f (param i32 i32)))
62+
(func (export "f") (call $f (i32.const 0) (i32.const 4)))
63+
)
64+
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
65+
(func (export "f") (canon lift (core func $m "f")))
66+
)
67+
68+
(instance $c (instantiate $c))
69+
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
70+
(export "f" (func $c2 "f"))
71+
)
72+
73+
(assert_trap (invoke "f") "unaligned pointer")
74+
75+
;; utf16 -> latin1+utf16 -- when shrinking memory it must be aligned
76+
(component
77+
(component $c
78+
(core module $m
79+
(func (export "") (param i32 i32))
80+
(func (export "realloc") (param $old_ptr i32) (param $old_size i32)
81+
(param $align i32) (param $new_size i32) (result i32)
82+
(if (i32.ne (local.get $align) (i32.const 2)) (then unreachable))
83+
(if (result i32) (i32.eqz (local.get $old_ptr))
84+
(then (i32.const 2)) ;; first allocation aligned
85+
(else (i32.const 3)) ;; second allocation unaligned
86+
)
87+
)
88+
(memory (export "memory") 1)
89+
)
90+
(core instance $m (instantiate $m))
91+
(func (export "a") (param "a" string)
92+
(canon lift
93+
(core func $m "")
94+
(realloc (func $m "realloc"))
95+
(memory $m "memory")
96+
string-encoding=latin1+utf16)
97+
)
98+
)
99+
100+
(component $c2
101+
(import "a" (func $f (param "a" string)))
102+
(core module $libc
103+
(memory (export "memory") 1)
104+
;; "AΣ" in UTF-16: 0x41 0x00 0xA3 0x03 (Σ = U+03A3, not Latin-1)
105+
;; Forces transcoding to take the UTF-16 grow path.
106+
(data (memory 0) (i32.const 0) "\41\00\a3\03")
107+
)
108+
(core instance $libc (instantiate $libc))
109+
(core func $f (canon lower (func $f) string-encoding=utf16 (memory $libc "memory")))
110+
(core module $m
111+
(import "" "" (func $f (param i32 i32)))
112+
(func (export "f") (call $f (i32.const 0) (i32.const 2)))
113+
)
114+
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
115+
(func (export "f") (canon lift (core func $m "f")))
116+
)
117+
118+
(instance $c (instantiate $c))
119+
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
120+
(export "f" (func $c2 "f"))
121+
)
122+
(assert_trap (invoke "f") "unaligned pointer")
123+
124+
;; latin1+utf16 -> latin1+utf16 -- auto-downsize
125+
(component
126+
(component $c
127+
(core module $m
128+
(func (export "") (param i32 i32) unreachable)
129+
(func (export "realloc") (param $old_ptr i32) (param $old_size i32)
130+
(param $align i32) (param $new_size i32) (result i32)
131+
(if (i32.ne (local.get $align) (i32.const 2)) (then unreachable))
132+
(if (result i32) (i32.eqz (local.get $old_ptr))
133+
(then (i32.const 2)) ;; first allocation aligned
134+
(else (i32.const 3)) ;; second allocation unaligned
135+
)
136+
)
137+
(memory (export "memory") 1)
138+
)
139+
(core instance $m (instantiate $m))
140+
(func (export "a") (param "a" string)
141+
(canon lift
142+
(core func $m "")
143+
(realloc (func $m "realloc"))
144+
(memory $m "memory")
145+
string-encoding=latin1+utf16)
146+
)
147+
)
148+
149+
(component $c2
150+
(import "a" (func $f (param "a" string)))
151+
(core module $libc
152+
(memory (export "memory") 1)
153+
;; "AA" in UTF-16: 0x41 0x00 0x41 0x00
154+
(data (memory 0) (i32.const 0) "\41\00\41\00")
155+
)
156+
(core instance $libc (instantiate $libc))
157+
(core func $f (canon lower (func $f) string-encoding=latin1+utf16 (memory $libc "memory")))
158+
(core module $m
159+
(import "" "" (func $f (param i32 i32)))
160+
;; the length here contains `UTF16_TAG` and it's additionally 1 code
161+
;; unit. This is a utf-16 encoded string but during transcoding it'll
162+
;; get shrunk to latin 1
163+
(func (export "f") (call $f (i32.const 0) (i32.const 0x8000_0002)))
164+
)
165+
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
166+
(func (export "f") (canon lift (core func $m "f")))
167+
)
168+
169+
(instance $c (instantiate $c))
170+
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
171+
(export "f" (func $c2 "f"))
172+
)
173+
(assert_trap (invoke "f") "unaligned pointer")
174+
175+
;; utf8 -> latin1+utf16 -- initial encode finishes but needs downsizing
176+
(component
177+
(component $c
178+
(core module $m
179+
(func (export "") (param i32 i32) unreachable)
180+
(func (export "realloc") (param $old_ptr i32) (param $old_size i32)
181+
(param $align i32) (param $new_size i32) (result i32)
182+
(if (i32.ne (local.get $align) (i32.const 2)) (then unreachable))
183+
(if (result i32) (i32.eqz (local.get $old_ptr))
184+
(then (i32.const 2)) ;; first allocation aligned
185+
(else (i32.const 3)) ;; second allocation unaligned
186+
)
187+
)
188+
(memory (export "memory") 1)
189+
)
190+
(core instance $m (instantiate $m))
191+
(func (export "a") (param "a" string)
192+
(canon lift
193+
(core func $m "")
194+
(realloc (func $m "realloc"))
195+
(memory $m "memory")
196+
string-encoding=latin1+utf16)
197+
)
198+
)
199+
200+
(component $c2
201+
(import "a" (func $f (param "a" string)))
202+
(core module $libc
203+
(memory (export "memory") 1)
204+
;; "Ë" in UTF-8 is "\xc3\xab", which is 2 bytes, but in latin1+utf16 it's
205+
;; 1 byte (0xCB). The initial allocation of 2 bytes completes the entire
206+
;; transcode but the final allocation needs to be shrunk to 1 byte.
207+
(data (memory 0) (i32.const 0) "Ë")
208+
)
209+
(core instance $libc (instantiate $libc))
210+
(core func $f (canon lower (func $f) (memory $libc "memory")))
211+
(core module $m
212+
(import "" "" (func $f (param i32 i32)))
213+
(func (export "f") (call $f (i32.const 0) (i32.const 2)))
214+
)
215+
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
216+
(func (export "f") (canon lift (core func $m "f")))
217+
)
218+
219+
(instance $c (instantiate $c))
220+
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
221+
(export "f" (func $c2 "f"))
222+
)
223+
(assert_trap (invoke "f") "unaligned pointer")
224+
225+
;; utf8 -> latin1+utf16
226+
;; - first realloc fails to hold latin1
227+
;; - second realloc is too big
228+
;; - third realloc shrinks
229+
(component
230+
(component $c
231+
(core module $m
232+
(global $cnt (mut i32) (i32.const 0))
233+
(func (export "") (param i32 i32)
234+
unreachable
235+
)
236+
(func (export "realloc") (param $old_ptr i32) (param $old_size i32)
237+
(param $align i32) (param $new_size i32) (result i32)
238+
(if (i32.ne (local.get $align) (i32.const 2)) (then unreachable))
239+
(global.set $cnt (i32.add (global.get $cnt) (i32.const 1)))
240+
241+
;; first allocation is aligned
242+
(if (i32.eq (global.get $cnt) (i32.const 1))
243+
(then
244+
(if (i32.ne (local.get $old_ptr) (i32.const 0)) (then unreachable))
245+
(if (i32.ne (local.get $old_size) (i32.const 0)) (then unreachable))
246+
(if (i32.ne (local.get $new_size) (i32.const 5)) (then unreachable))
247+
(return (i32.const 2)))
248+
)
249+
;; second allocation is aligned
250+
(if (i32.eq (global.get $cnt) (i32.const 2))
251+
(then
252+
(if (i32.ne (local.get $old_ptr) (i32.const 2)) (then unreachable))
253+
(if (i32.ne (local.get $old_size) (i32.const 5)) (then unreachable))
254+
(if (i32.ne (local.get $new_size) (i32.const 10)) (then unreachable))
255+
(return (i32.const 4)))
256+
)
257+
;; third allocation is unaligned
258+
(if (i32.eq (global.get $cnt) (i32.const 3))
259+
(then
260+
(if (i32.ne (local.get $old_ptr) (i32.const 4)) (then unreachable))
261+
(if (i32.ne (local.get $old_size) (i32.const 10)) (then unreachable))
262+
(if (i32.ne (local.get $new_size) (i32.const 4)) (then unreachable))
263+
(return (i32.const 3)))
264+
)
265+
266+
unreachable
267+
)
268+
(memory (export "memory") 1)
269+
)
270+
(core instance $m (instantiate $m))
271+
(func (export "a") (param "a" string)
272+
(canon lift
273+
(core func $m "")
274+
(realloc (func $m "realloc"))
275+
(memory $m "memory")
276+
string-encoding=latin1+utf16)
277+
)
278+
)
279+
280+
(component $c2
281+
(import "a" (func $f (param "a" string)))
282+
(core module $libc
283+
(memory (export "memory") 1)
284+
;; "Ë┛" in UTF-8 is "\xc3\xab\xe2\x8c\x9b", 5 bytes.
285+
;; * First, a 5-byte allocation is made to see if it fits in latin 1.
286+
;; * This fails since "┛" does not fit in latin1. The second allocation
287+
;; is over-large at 10 bytes (twice the original length).
288+
;; * The string encoded in UTF-16 is "\xcb\x00\x1b%", which is 4 bytes.
289+
;; * The 10-byte allocation is shrunk to 4 bytes, which is what this
290+
;; test is looking for (proper alignment in the 3rd realloc).
291+
(data (memory 0) (i32.const 0) "Ë┛")
292+
)
293+
(core instance $libc (instantiate $libc))
294+
(core func $f (canon lower (func $f) (memory $libc "memory")))
295+
(core module $m
296+
(import "" "" (func $f (param i32 i32)))
297+
(func (export "f") (call $f (i32.const 0) (i32.const 5)))
298+
)
299+
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
300+
(func (export "f") (canon lift (core func $m "f")))
301+
)
302+
303+
(instance $c (instantiate $c))
304+
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
305+
(export "f" (func $c2 "f"))
306+
)
307+
(assert_trap (invoke "f") "unaligned pointer")

0 commit comments

Comments
 (0)