Skip to content

Commit 12b11e4

Browse files
author
Eric Biggers
committed
lib/crypto: arm64: Assume a little-endian kernel
Since support for big-endian arm64 kernels was removed, the CPU_LE() macro now unconditionally emits the code it is passed, and the CPU_BE() macro now unconditionally discards the code it is passed. Simplify the assembly code in lib/crypto/arm64/ accordingly. Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20260401003331.144065-1-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
1 parent 180e92d commit 12b11e4

7 files changed

Lines changed: 36 additions & 65 deletions

File tree

lib/crypto/arm64/aes-cipher-core.S

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,6 @@
8787
ldp w8, w9, [rk], #16
8888
ldp w10, w11, [rk, #-8]
8989

90-
CPU_BE( rev w4, w4 )
91-
CPU_BE( rev w5, w5 )
92-
CPU_BE( rev w6, w6 )
93-
CPU_BE( rev w7, w7 )
94-
9590
eor w4, w4, w8
9691
eor w5, w5, w9
9792
eor w6, w6, w10
@@ -112,11 +107,6 @@ CPU_BE( rev w7, w7 )
112107
3: adr_l tt, \ltab
113108
\round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
114109

115-
CPU_BE( rev w4, w4 )
116-
CPU_BE( rev w5, w5 )
117-
CPU_BE( rev w6, w6 )
118-
CPU_BE( rev w7, w7 )
119-
120110
stp w4, w5, [out]
121111
stp w6, w7, [out, #8]
122112
ret

lib/crypto/arm64/chacha-neon-core.S

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -531,10 +531,6 @@ SYM_FUNC_START(chacha_4block_xor_neon)
531531
add v3.4s, v3.4s, v19.4s
532532
add a2, a2, w8
533533
add a3, a3, w9
534-
CPU_BE( rev a0, a0 )
535-
CPU_BE( rev a1, a1 )
536-
CPU_BE( rev a2, a2 )
537-
CPU_BE( rev a3, a3 )
538534

539535
ld4r {v24.4s-v27.4s}, [x0], #16
540536
ld4r {v28.4s-v31.4s}, [x0]
@@ -555,10 +551,6 @@ CPU_BE( rev a3, a3 )
555551
add v7.4s, v7.4s, v23.4s
556552
add a6, a6, w8
557553
add a7, a7, w9
558-
CPU_BE( rev a4, a4 )
559-
CPU_BE( rev a5, a5 )
560-
CPU_BE( rev a6, a6 )
561-
CPU_BE( rev a7, a7 )
562554

563555
// x8[0-3] += s2[0]
564556
// x9[0-3] += s2[1]
@@ -576,10 +568,6 @@ CPU_BE( rev a7, a7 )
576568
add v11.4s, v11.4s, v27.4s
577569
add a10, a10, w8
578570
add a11, a11, w9
579-
CPU_BE( rev a8, a8 )
580-
CPU_BE( rev a9, a9 )
581-
CPU_BE( rev a10, a10 )
582-
CPU_BE( rev a11, a11 )
583571

584572
// x12[0-3] += s3[0]
585573
// x13[0-3] += s3[1]
@@ -597,10 +585,6 @@ CPU_BE( rev a11, a11 )
597585
add v15.4s, v15.4s, v31.4s
598586
add a14, a14, w8
599587
add a15, a15, w9
600-
CPU_BE( rev a12, a12 )
601-
CPU_BE( rev a13, a13 )
602-
CPU_BE( rev a14, a14 )
603-
CPU_BE( rev a15, a15 )
604588

605589
// interleave 32-bit words in state n, n+1
606590
ldp w6, w7, [x2], #64

lib/crypto/arm64/ghash-neon-core.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ SYM_FUNC_START(pmull_ghash_update_p8)
192192
sub x0, x0, #1
193193

194194
/* multiply XL by SHASH in GF(2^128) */
195-
CPU_LE( rev64 T1.16b, T1.16b )
195+
rev64 T1.16b, T1.16b
196196

197197
ext T2.16b, XL.16b, XL.16b, #8
198198
ext IN1.16b, T1.16b, T1.16b, #8

lib/crypto/arm64/sha1-ce-core.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ SYM_FUNC_START(sha1_ce_transform)
8080
0: ld1 {v8.4s-v11.4s}, [x1], #64
8181
sub x2, x2, #1
8282

83-
CPU_LE( rev32 v8.16b, v8.16b )
84-
CPU_LE( rev32 v9.16b, v9.16b )
85-
CPU_LE( rev32 v10.16b, v10.16b )
86-
CPU_LE( rev32 v11.16b, v11.16b )
83+
rev32 v8.16b, v8.16b
84+
rev32 v9.16b, v9.16b
85+
rev32 v10.16b, v10.16b
86+
rev32 v11.16b, v11.16b
8787

8888
add t0.4s, v8.4s, k0.4s
8989
mov dg0v.16b, dgav.16b

lib/crypto/arm64/sha256-ce.S

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ SYM_FUNC_START(sha256_ce_transform)
9494
0: ld1 {v16.4s-v19.4s}, [x1], #64
9595
sub x2, x2, #1
9696

97-
CPU_LE( rev32 v16.16b, v16.16b )
98-
CPU_LE( rev32 v17.16b, v17.16b )
99-
CPU_LE( rev32 v18.16b, v18.16b )
100-
CPU_LE( rev32 v19.16b, v19.16b )
97+
rev32 v16.16b, v16.16b
98+
rev32 v17.16b, v17.16b
99+
rev32 v18.16b, v18.16b
100+
rev32 v19.16b, v19.16b
101101

102102
add t0.4s, v16.4s, v0.4s
103103
mov dg0v.16b, dgav.16b
@@ -289,14 +289,14 @@ SYM_FUNC_START(sha256_ce_finup2x)
289289
ld1 {v20.4s-v23.4s}, [data2], #64
290290
.Lfinup2x_loop_have_data:
291291
// Convert the words of the data blocks from big endian.
292-
CPU_LE( rev32 v16.16b, v16.16b )
293-
CPU_LE( rev32 v17.16b, v17.16b )
294-
CPU_LE( rev32 v18.16b, v18.16b )
295-
CPU_LE( rev32 v19.16b, v19.16b )
296-
CPU_LE( rev32 v20.16b, v20.16b )
297-
CPU_LE( rev32 v21.16b, v21.16b )
298-
CPU_LE( rev32 v22.16b, v22.16b )
299-
CPU_LE( rev32 v23.16b, v23.16b )
292+
rev32 v16.16b, v16.16b
293+
rev32 v17.16b, v17.16b
294+
rev32 v18.16b, v18.16b
295+
rev32 v19.16b, v19.16b
296+
rev32 v20.16b, v20.16b
297+
rev32 v21.16b, v21.16b
298+
rev32 v22.16b, v22.16b
299+
rev32 v23.16b, v23.16b
300300
.Lfinup2x_loop_have_bswapped_data:
301301

302302
// Save the original state for each block.
@@ -336,19 +336,16 @@ CPU_LE( rev32 v23.16b, v23.16b )
336336
sub w8, len, #64 // w8 = len - 64
337337
add data1, data1, w8, sxtw // data1 += len - 64
338338
add data2, data2, w8, sxtw // data2 += len - 64
339-
CPU_LE( mov x9, #0x80 )
340-
CPU_LE( fmov d16, x9 )
341-
CPU_BE( movi v16.16b, #0 )
342-
CPU_BE( mov x9, #0x8000000000000000 )
343-
CPU_BE( mov v16.d[1], x9 )
339+
mov x9, #0x80
340+
fmov d16, x9
344341
movi v17.16b, #0
345342
stp q16, q17, [sp, #64]
346343
stp q17, q17, [sp, #96]
347344
sub x9, sp, w8, sxtw // x9 = &sp[64 - len]
348345
cmp len, #56
349346
b.ge 1f // will count spill into its own block?
350347
lsl count, count, #3
351-
CPU_LE( rev count, count )
348+
rev count, count
352349
str count, [x9, #56]
353350
mov final_step, #2 // won't need count-only block
354351
b 2f
@@ -393,10 +390,10 @@ CPU_LE( rev count, count )
393390

394391
.Lfinup2x_done:
395392
// Write the two digests with all bytes in the correct order.
396-
CPU_LE( rev32 state0_a.16b, state0_a.16b )
397-
CPU_LE( rev32 state1_a.16b, state1_a.16b )
398-
CPU_LE( rev32 state0_b.16b, state0_b.16b )
399-
CPU_LE( rev32 state1_b.16b, state1_b.16b )
393+
rev32 state0_a.16b, state0_a.16b
394+
rev32 state1_a.16b, state1_a.16b
395+
rev32 state0_b.16b, state0_b.16b
396+
rev32 state1_b.16b, state1_b.16b
400397
st1 {state0_a.4s-state1_a.4s}, [out1]
401398
st1 {state0_b.4s-state1_b.4s}, [out2]
402399
add sp, sp, #128

lib/crypto/arm64/sha512-ce-core.S

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,14 @@ SYM_FUNC_START(sha512_ce_transform)
110110
ld1 {v16.2d-v19.2d}, [x1], #64
111111
sub x2, x2, #1
112112

113-
CPU_LE( rev64 v12.16b, v12.16b )
114-
CPU_LE( rev64 v13.16b, v13.16b )
115-
CPU_LE( rev64 v14.16b, v14.16b )
116-
CPU_LE( rev64 v15.16b, v15.16b )
117-
CPU_LE( rev64 v16.16b, v16.16b )
118-
CPU_LE( rev64 v17.16b, v17.16b )
119-
CPU_LE( rev64 v18.16b, v18.16b )
120-
CPU_LE( rev64 v19.16b, v19.16b )
113+
rev64 v12.16b, v12.16b
114+
rev64 v13.16b, v13.16b
115+
rev64 v14.16b, v14.16b
116+
rev64 v15.16b, v15.16b
117+
rev64 v16.16b, v16.16b
118+
rev64 v17.16b, v17.16b
119+
rev64 v18.16b, v18.16b
120+
rev64 v19.16b, v19.16b
121121

122122
mov x4, x3 // rc pointer
123123

lib/crypto/arm64/sm3-ce-core.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,10 @@ SYM_FUNC_START(sm3_ce_transform)
9191
mov v15.16b, v8.16b
9292
mov v16.16b, v9.16b
9393

94-
CPU_LE( rev32 v0.16b, v0.16b )
95-
CPU_LE( rev32 v1.16b, v1.16b )
96-
CPU_LE( rev32 v2.16b, v2.16b )
97-
CPU_LE( rev32 v3.16b, v3.16b )
94+
rev32 v0.16b, v0.16b
95+
rev32 v1.16b, v1.16b
96+
rev32 v2.16b, v2.16b
97+
rev32 v3.16b, v3.16b
9898

9999
ext v11.16b, v13.16b, v13.16b, #4
100100

0 commit comments

Comments
 (0)