@@ -181,13 +181,13 @@ SYM_FUNC_END(__pmull_p8_16x64)
181181
182182 pmull16x64_\p fold_consts , \reg1 , v8
183183
184- CPU_LE( rev64 v11.16b, v11.16b )
185- CPU_LE( rev64 v12.16b, v12.16b )
184+ rev64 v11.16b , v11.16b
185+ rev64 v12.16b , v12.16b
186186
187187 pmull16x64_\p fold_consts , \reg2 , v9
188188
189- CPU_LE( ext v11.16b, v11.16b , v11.16b , # 8 )
190- CPU_LE( ext v12.16b, v12.16b , v12.16b , # 8 )
189+ ext v11.16b , v11.16b , v11.16b , # 8
190+ ext v12.16b , v12.16b , v12.16b , # 8
191191
192192 eor \reg1\().16b , \reg1\().16b , v8.16b
193193 eor \reg2\().16b , \reg2\().16b , v9.16b
@@ -220,22 +220,22 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
220220 ldp q4 , q5 , [ buf , # 0x40 ]
221221 ldp q6 , q7 , [ buf , # 0x60 ]
222222 add buf , buf , # 0x80
223- CPU_LE( rev64 v0.16b, v0.16b )
224- CPU_LE( rev64 v1.16b, v1.16b )
225- CPU_LE( rev64 v2.16b, v2.16b )
226- CPU_LE( rev64 v3.16b, v3.16b )
227- CPU_LE( rev64 v4.16b, v4.16b )
228- CPU_LE( rev64 v5.16b, v5.16b )
229- CPU_LE( rev64 v6.16b, v6.16b )
230- CPU_LE( rev64 v7.16b, v7.16b )
231- CPU_LE( ext v0.16b, v0.16b , v0.16b , # 8 )
232- CPU_LE( ext v1.16b, v1.16b , v1.16b , # 8 )
233- CPU_LE( ext v2.16b, v2.16b , v2.16b , # 8 )
234- CPU_LE( ext v3.16b, v3.16b , v3.16b , # 8 )
235- CPU_LE( ext v4.16b, v4.16b , v4.16b , # 8 )
236- CPU_LE( ext v5.16b, v5.16b , v5.16b , # 8 )
237- CPU_LE( ext v6.16b, v6.16b , v6.16b , # 8 )
238- CPU_LE( ext v7.16b, v7.16b , v7.16b , # 8 )
223+ rev64 v0.16b , v0.16b
224+ rev64 v1.16b , v1.16b
225+ rev64 v2.16b , v2.16b
226+ rev64 v3.16b , v3.16b
227+ rev64 v4.16b , v4.16b
228+ rev64 v5.16b , v5.16b
229+ rev64 v6.16b , v6.16b
230+ rev64 v7.16b , v7.16b
231+ ext v0.16b , v0.16b , v0.16b , # 8
232+ ext v1.16b , v1.16b , v1.16b , # 8
233+ ext v2.16b , v2.16b , v2.16b , # 8
234+ ext v3.16b , v3.16b , v3.16b , # 8
235+ ext v4.16b , v4.16b , v4.16b , # 8
236+ ext v5.16b , v5.16b , v5.16b , # 8
237+ ext v6.16b , v6.16b , v6.16b , # 8
238+ ext v7.16b , v7.16b , v7.16b , # 8
239239
240240 // XOR the first 16 data * bits * with the initial CRC value.
241241 movi v8.16b , # 0
@@ -288,8 +288,8 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
288288 pmull16x64_\p fold_consts , v7 , v8
289289 eor v7.16b , v7.16b , v8.16b
290290 ldr q0 , [ buf ], # 16
291- CPU_LE( rev64 v0.16b, v0.16b )
292- CPU_LE( ext v0.16b, v0.16b , v0.16b , # 8 )
291+ rev64 v0.16b , v0.16b
292+ ext v0.16b , v0.16b , v0.16b , # 8
293293 eor v7.16b , v7.16b , v0.16b
294294 subs len , len , # 16
295295 b.ge .Lfold_16_bytes_loop_\@
@@ -310,8 +310,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
310310 // v0 = last 16 original data bytes
311311 add buf , buf , len
312312 ldr q0 , [ buf , # - 16 ]
313- CPU_LE( rev64 v0.16b, v0.16b )
314- CPU_LE( ext v0.16b, v0.16b , v0.16b , # 8 )
313+ rev64 v0.16b , v0.16b
314+ ext v0.16b , v0.16b , v0.16b , # 8
315315
316316 // v1 = high order part of second chunk: v7 left - shifted by 'len' bytes.
317317 adr_l x4 , .Lbyteshift_table + 16
@@ -344,8 +344,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
344344
345345 // Load the first 16 data bytes.
346346 ldr q7 , [ buf ], # 0x10
347- CPU_LE( rev64 v7.16b, v7.16b )
348- CPU_LE( ext v7.16b, v7.16b , v7.16b , # 8 )
347+ rev64 v7.16b , v7.16b
348+ ext v7.16b , v7.16b , v7.16b , # 8
349349
350350 // XOR the first 16 data * bits * with the initial CRC value.
351351 movi v0.16b , # 0
@@ -382,8 +382,8 @@ SYM_FUNC_START(crc_t10dif_pmull_p8)
382382
383383 crc_t10dif_pmull p8
384384
385- CPU_LE( rev64 v7.16b, v7.16b )
386- CPU_LE( ext v7.16b, v7.16b , v7.16b , # 8 )
385+ rev64 v7.16b , v7.16b
386+ ext v7.16b , v7.16b , v7.16b , # 8
387387 str q7 , [ x3 ]
388388
389389 frame_pop
0 commit comments