Skip to content

Commit 370c388

Browse files
committed
Merge tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull crypto library updates from Eric Biggers: - Migrate more hash algorithms from the traditional crypto subsystem to lib/crypto/ Like the algorithms migrated earlier (e.g. SHA-*), this simplifies the implementations, improves performance, enables further simplifications in calling code, and solves various other issues: - AES CBC-based MACs (AES-CMAC, AES-XCBC-MAC, and AES-CBC-MAC) - Support these algorithms in lib/crypto/ using the AES library and the existing arm64 assembly code - Reimplement the traditional crypto API's "cmac(aes)", "xcbc(aes)", and "cbcmac(aes)" on top of the library - Convert mac80211 to use the AES-CMAC library. Note: several other subsystems can use it too and will be converted later - Drop the broken, nonstandard, and likely unused support for "xcbc(aes)" with key lengths other than 128 bits - Enable optimizations by default - GHASH - Migrate the standalone GHASH code into lib/crypto/ - Integrate the GHASH code more closely with the very similar POLYVAL code, and improve the generic GHASH implementation to resist cache-timing attacks and use much less memory - Reimplement the AES-GCM library and the "gcm" crypto_aead template on top of the GHASH library. Remove "ghash" from the crypto_shash API, as it's no longer needed - Enable optimizations by default - SM3 - Migrate the kernel's existing SM3 code into lib/crypto/, and reimplement the traditional crypto API's "sm3" on top of it - I don't recommend using SM3, but this cleanup is worthwhile to organize the code the same way as other algorithms - Testing improvements: - Add a KUnit test suite for each of the new library APIs - Migrate the existing ChaCha20Poly1305 test to KUnit - Make the KUnit all_tests.config enable all crypto library tests - Move the test kconfig options to the Runtime Testing menu - Other updates to arch-optimized crypto code: - Optimize SHA-256 for Zhaoxin CPUs using the Padlock Hash Engine - Remove some MD5 implementations that are no longer worth keeping - Drop big endian and voluntary preemption support from the arm64 code, as those configurations are no longer supported on arm64 - Make jitterentropy and samples/tsm-mr use the crypto library APIs * tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (66 commits) lib/crypto: arm64: Assume a little-endian kernel arm64: fpsimd: Remove obsolete cond_yield macro lib/crypto: arm64/sha3: Remove obsolete chunking logic lib/crypto: arm64/sha512: Remove obsolete chunking logic lib/crypto: arm64/sha256: Remove obsolete chunking logic lib/crypto: arm64/sha1: Remove obsolete chunking logic lib/crypto: arm64/poly1305: Remove obsolete chunking logic lib/crypto: arm64/gf128hash: Remove obsolete chunking logic lib/crypto: arm64/chacha: Remove obsolete chunking logic lib/crypto: arm64/aes: Remove obsolete chunking logic lib/crypto: Include <crypto/utils.h> instead of <crypto/algapi.h> lib/crypto: aesgcm: Don't disable IRQs during AES block encryption lib/crypto: aescfb: Don't disable IRQs during AES block encryption lib/crypto: tests: Migrate ChaCha20Poly1305 self-test to KUnit lib/crypto: sparc: Drop optimized MD5 code lib/crypto: mips: Drop optimized MD5 code lib: Move crypto library tests to Runtime Testing menu crypto: sm3 - Remove 'struct sm3_state' crypto: sm3 - Remove the original "sm3_block_generic()" crypto: sm3 - Remove sm3_base.h ...
2 parents 9932f00 + 12b11e4 commit 370c388

154 files changed

Lines changed: 4879 additions & 4875 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

MAINTAINERS

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12279,10 +12279,10 @@ F: arch/powerpc/crypto/aes_cbc.c
1227912279
F: arch/powerpc/crypto/aes_ctr.c
1228012280
F: arch/powerpc/crypto/aes_xts.c
1228112281
F: arch/powerpc/crypto/aesp8-ppc.*
12282-
F: arch/powerpc/crypto/ghash.c
12283-
F: arch/powerpc/crypto/ghashp8-ppc.pl
1228412282
F: arch/powerpc/crypto/ppc-xlate.pl
1228512283
F: arch/powerpc/crypto/vmx.c
12284+
F: lib/crypto/powerpc/gf128hash.h
12285+
F: lib/crypto/powerpc/ghashp8-ppc.pl
1228612286

1228712287
IBM ServeRAID RAID DRIVER
1228812288
S: Orphan

arch/arm/crypto/Kconfig

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,17 @@
33
menu "Accelerated Cryptographic Algorithms for CPU (arm)"
44

55
config CRYPTO_GHASH_ARM_CE
6-
tristate "Hash functions: GHASH (PMULL/NEON/ARMv8 Crypto Extensions)"
6+
tristate "AEAD cipher: AES in GCM mode (ARMv8 Crypto Extensions)"
77
depends on KERNEL_MODE_NEON
88
select CRYPTO_AEAD
9-
select CRYPTO_HASH
10-
select CRYPTO_CRYPTD
119
select CRYPTO_LIB_AES
1210
select CRYPTO_LIB_GF128MUL
1311
help
14-
GCM GHASH function (NIST SP800-38D)
12+
AEAD cipher: AES-GCM
1513

1614
Architecture: arm using
17-
- PMULL (Polynomial Multiply Long) instructions
18-
- NEON (Advanced SIMD) extensions
1915
- ARMv8 Crypto Extensions
2016

21-
Use an implementation of GHASH (used by the GCM AEAD chaining mode)
22-
that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
23-
that is part of the ARMv8 Crypto Extensions, or a slower variant that
24-
uses the vmull.p8 instruction that is part of the basic NEON ISA.
25-
2617
config CRYPTO_AES_ARM_BS
2718
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)"
2819
depends on KERNEL_MODE_NEON

arch/arm/crypto/ghash-ce-core.S

Lines changed: 14 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* SPDX-License-Identifier: GPL-2.0-only */
22
/*
3-
* Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
3+
* Accelerated AES-GCM implementation with ARMv8 Crypto Extensions.
44
*
55
* Copyright (C) 2015 - 2017 Linaro Ltd.
66
* Copyright (C) 2023 Google LLC. <ardb@google.com>
@@ -29,39 +29,10 @@
2929
XM_H .req d7
3030
XH_L .req d8
3131

32-
t0l .req d10
33-
t0h .req d11
34-
t1l .req d12
35-
t1h .req d13
36-
t2l .req d14
37-
t2h .req d15
38-
t3l .req d16
39-
t3h .req d17
40-
t4l .req d18
41-
t4h .req d19
42-
43-
t0q .req q5
44-
t1q .req q6
45-
t2q .req q7
46-
t3q .req q8
47-
t4q .req q9
4832
XH2 .req q9
4933

50-
s1l .req d20
51-
s1h .req d21
52-
s2l .req d22
53-
s2h .req d23
54-
s3l .req d24
55-
s3h .req d25
56-
s4l .req d26
57-
s4h .req d27
58-
5934
MASK .req d28
60-
SHASH2_p8 .req d28
6135

62-
k16 .req d29
63-
k32 .req d30
64-
k48 .req d31
6536
SHASH2_p64 .req d31
6637

6738
HH .req q10
@@ -93,72 +64,6 @@
9364

9465
.text
9566

96-
.macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4
97-
vmull.p64 \rd, \rn, \rm
98-
.endm
99-
100-
/*
101-
* This implementation of 64x64 -> 128 bit polynomial multiplication
102-
* using vmull.p8 instructions (8x8 -> 16) is taken from the paper
103-
* "Fast Software Polynomial Multiplication on ARM Processors Using
104-
* the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
105-
* Ricardo Dahab (https://hal.inria.fr/hal-01506572)
106-
*
107-
* It has been slightly tweaked for in-order performance, and to allow
108-
* 'rq' to overlap with 'ad' or 'bd'.
109-
*/
110-
.macro __pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
111-
vext.8 t0l, \ad, \ad, #1 @ A1
112-
.ifc \b1, t4l
113-
vext.8 t4l, \bd, \bd, #1 @ B1
114-
.endif
115-
vmull.p8 t0q, t0l, \bd @ F = A1*B
116-
vext.8 t1l, \ad, \ad, #2 @ A2
117-
vmull.p8 t4q, \ad, \b1 @ E = A*B1
118-
.ifc \b2, t3l
119-
vext.8 t3l, \bd, \bd, #2 @ B2
120-
.endif
121-
vmull.p8 t1q, t1l, \bd @ H = A2*B
122-
vext.8 t2l, \ad, \ad, #3 @ A3
123-
vmull.p8 t3q, \ad, \b2 @ G = A*B2
124-
veor t0q, t0q, t4q @ L = E + F
125-
.ifc \b3, t4l
126-
vext.8 t4l, \bd, \bd, #3 @ B3
127-
.endif
128-
vmull.p8 t2q, t2l, \bd @ J = A3*B
129-
veor t0l, t0l, t0h @ t0 = (L) (P0 + P1) << 8
130-
veor t1q, t1q, t3q @ M = G + H
131-
.ifc \b4, t3l
132-
vext.8 t3l, \bd, \bd, #4 @ B4
133-
.endif
134-
vmull.p8 t4q, \ad, \b3 @ I = A*B3
135-
veor t1l, t1l, t1h @ t1 = (M) (P2 + P3) << 16
136-
vmull.p8 t3q, \ad, \b4 @ K = A*B4
137-
vand t0h, t0h, k48
138-
vand t1h, t1h, k32
139-
veor t2q, t2q, t4q @ N = I + J
140-
veor t0l, t0l, t0h
141-
veor t1l, t1l, t1h
142-
veor t2l, t2l, t2h @ t2 = (N) (P4 + P5) << 24
143-
vand t2h, t2h, k16
144-
veor t3l, t3l, t3h @ t3 = (K) (P6 + P7) << 32
145-
vmov.i64 t3h, #0
146-
vext.8 t0q, t0q, t0q, #15
147-
veor t2l, t2l, t2h
148-
vext.8 t1q, t1q, t1q, #14
149-
vmull.p8 \rq, \ad, \bd @ D = A*B
150-
vext.8 t2q, t2q, t2q, #13
151-
vext.8 t3q, t3q, t3q, #12
152-
veor t0q, t0q, t1q
153-
veor t2q, t2q, t3q
154-
veor \rq, \rq, t0q
155-
veor \rq, \rq, t2q
156-
.endm
157-
158-
//
159-
// PMULL (64x64->128) based reduction for CPUs that can do
160-
// it in a single instruction.
161-
//
16267
.macro __pmull_reduce_p64
16368
vmull.p64 T1, XL_L, MASK
16469

@@ -170,30 +75,7 @@
17075
vmull.p64 XL, T1_H, MASK
17176
.endm
17277

173-
//
174-
// Alternative reduction for CPUs that lack support for the
175-
// 64x64->128 PMULL instruction
176-
//
177-
.macro __pmull_reduce_p8
178-
veor XL_H, XL_H, XM_L
179-
veor XH_L, XH_L, XM_H
180-
181-
vshl.i64 T1, XL, #57
182-
vshl.i64 T2, XL, #62
183-
veor T1, T1, T2
184-
vshl.i64 T2, XL, #63
185-
veor T1, T1, T2
186-
veor XL_H, XL_H, T1_L
187-
veor XH_L, XH_L, T1_H
188-
189-
vshr.u64 T1, XL, #1
190-
veor XH, XH, XL
191-
veor XL, XL, T1
192-
vshr.u64 T1, T1, #6
193-
vshr.u64 XL, XL, #1
194-
.endm
195-
196-
.macro ghash_update, pn, enc, aggregate=1, head=1
78+
.macro ghash_update, enc, aggregate=1, head=1
19779
vld1.64 {XL}, [r1]
19880

19981
.if \head
@@ -206,8 +88,7 @@
20688
b 3f
20789
.endif
20890

209-
0: .ifc \pn, p64
210-
.if \aggregate
91+
0: .if \aggregate
21192
tst r0, #3 // skip until #blocks is a
21293
bne 2f // round multiple of 4
21394

@@ -288,7 +169,6 @@
288169

289170
b 1b
290171
.endif
291-
.endif
292172

293173
2: vld1.8 {T1}, [r2]!
294174

@@ -308,15 +188,15 @@
308188
veor T1_L, T1_L, XL_H
309189
veor XL, XL, IN1
310190

311-
__pmull_\pn XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h @ a1 * b1
191+
vmull.p64 XH, XL_H, SHASH_H @ a1 * b1
312192
veor T1, T1, XL
313-
__pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0
314-
__pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0)
193+
vmull.p64 XL, XL_L, SHASH_L @ a0 * b0
194+
vmull.p64 XM, T1_L, SHASH2_p64 @ (a1+a0)(b1+b0)
315195

316196
4: veor T1, XL, XH
317197
veor XM, XM, T1
318198

319-
__pmull_reduce_\pn
199+
__pmull_reduce_p64
320200

321201
veor T1, T1, XH
322202
veor XL, XL, T1
@@ -325,8 +205,8 @@
325205
.endm
326206

327207
/*
328-
* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
329-
* struct ghash_key const *k, const char *head)
208+
* void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
209+
* u64 const h[4][2], const char *head)
330210
*/
331211
ENTRY(pmull_ghash_update_p64)
332212
vld1.64 {SHASH}, [r3]!
@@ -341,35 +221,12 @@ ENTRY(pmull_ghash_update_p64)
341221
vmov.i8 MASK, #0xe1
342222
vshl.u64 MASK, MASK, #57
343223

344-
ghash_update p64
224+
ghash_update
345225
vst1.64 {XL}, [r1]
346226

347227
bx lr
348228
ENDPROC(pmull_ghash_update_p64)
349229

350-
ENTRY(pmull_ghash_update_p8)
351-
vld1.64 {SHASH}, [r3]
352-
veor SHASH2_p8, SHASH_L, SHASH_H
353-
354-
vext.8 s1l, SHASH_L, SHASH_L, #1
355-
vext.8 s2l, SHASH_L, SHASH_L, #2
356-
vext.8 s3l, SHASH_L, SHASH_L, #3
357-
vext.8 s4l, SHASH_L, SHASH_L, #4
358-
vext.8 s1h, SHASH_H, SHASH_H, #1
359-
vext.8 s2h, SHASH_H, SHASH_H, #2
360-
vext.8 s3h, SHASH_H, SHASH_H, #3
361-
vext.8 s4h, SHASH_H, SHASH_H, #4
362-
363-
vmov.i64 k16, #0xffff
364-
vmov.i64 k32, #0xffffffff
365-
vmov.i64 k48, #0xffffffffffff
366-
367-
ghash_update p8
368-
vst1.64 {XL}, [r1]
369-
370-
bx lr
371-
ENDPROC(pmull_ghash_update_p8)
372-
373230
e0 .req q9
374231
e1 .req q10
375232
e2 .req q11
@@ -536,7 +393,7 @@ ENTRY(pmull_gcm_encrypt)
536393

537394
vld1.64 {SHASH}, [r3]
538395

539-
ghash_update p64, enc, head=0
396+
ghash_update enc, head=0
540397
vst1.64 {XL}, [r1]
541398

542399
pop {r4-r8, pc}
@@ -554,7 +411,7 @@ ENTRY(pmull_gcm_decrypt)
554411

555412
vld1.64 {SHASH}, [r3]
556413

557-
ghash_update p64, dec, head=0
414+
ghash_update dec, head=0
558415
vst1.64 {XL}, [r1]
559416

560417
pop {r4-r8, pc}
@@ -603,7 +460,7 @@ ENTRY(pmull_gcm_enc_final)
603460
vshl.u64 MASK, MASK, #57
604461
mov r0, #1
605462
bne 3f // process head block first
606-
ghash_update p64, aggregate=0, head=0
463+
ghash_update aggregate=0, head=0
607464

608465
vrev64.8 XL, XL
609466
vext.8 XL, XL, XL, #8
@@ -660,7 +517,7 @@ ENTRY(pmull_gcm_dec_final)
660517
vshl.u64 MASK, MASK, #57
661518
mov r0, #1
662519
bne 3f // process head block first
663-
ghash_update p64, aggregate=0, head=0
520+
ghash_update aggregate=0, head=0
664521

665522
vrev64.8 XL, XL
666523
vext.8 XL, XL, XL, #8

0 commit comments

Comments
 (0)