Skip to content

Commit d142ab3

Browse files
committed
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC updates from Eric Biggers: - Several improvements related to crc_kunit, to align with the standard KUnit conventions and make it easier for developers and CI systems to run this test suite - Add an arm64-optimized implementation of CRC64-NVME - Remove unused code for big endian arm64 * tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: lib/crc: arm64: Simplify intrinsics implementation lib/crc: arm64: Use existing macros for kernel-mode FPU cflags lib/crc: arm64: Drop unnecessary chunking logic from crc64 lib/crc: arm64: Assume a little-endian kernel lib/crc: arm64: add NEON accelerated CRC64-NVMe implementation lib/crc: arm64: Drop check for CONFIG_KERNEL_MODE_NEON crypto: crc32c - Remove another outdated comment crypto: crc32c - Remove more outdated usage information kunit: configs: Enable all CRC tests in all_tests.config lib/crc: tests: Add a .kunitconfig file lib/crc: tests: Add CRC_ENABLE_ALL_FOR_KUNIT lib/crc: tests: Make crc_kunit test only the enabled CRC variants
2 parents 370c388 + 8fdef85 commit d142ab3

10 files changed

Lines changed: 172 additions & 65 deletions

File tree

crypto/crc32c.c

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// SPDX-License-Identifier: GPL-2.0-or-later
22
/*
3-
* Cryptographic API.
4-
*
5-
* CRC32C chksum
3+
* crypto_shash support for CRC-32C
64
*
75
*@Article{castagnoli-crc,
86
* author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
@@ -15,16 +13,6 @@
1513
* pages = {},
1614
* month = {June},
1715
*}
18-
* Used by the iSCSI driver, possibly others, and derived from
19-
* the iscsi-crc.c module of the linux-iscsi driver at
20-
* http://linux-iscsi.sourceforge.net.
21-
*
22-
* Following the example of lib/crc32, this function is intended to be
23-
* flexible and useful for all users. Modules that currently have their
24-
* own crc32c, but hopefully may be able to use this one are:
25-
* net/sctp (please add all your doco to here if you change to
26-
* use this one!)
27-
* <endoflist>
2816
*
2917
* Copyright (c) 2004 Cisco Systems, Inc.
3018
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
@@ -49,11 +37,6 @@ struct chksum_desc_ctx {
4937
u32 crc;
5038
};
5139

52-
/*
53-
* Steps through buffer one byte at a time, calculates reflected
54-
* crc using table.
55-
*/
56-
5740
static int chksum_init(struct shash_desc *desc)
5841
{
5942
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);

lib/crc/.kunitconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
CONFIG_KUNIT=y
2+
CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y
3+
CONFIG_CRC_KUNIT_TEST=y

lib/crc/Kconfig

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ config CRC_T10DIF_ARCH
4848
bool
4949
depends on CRC_T10DIF && CRC_OPTIMIZATIONS
5050
default y if ARM && KERNEL_MODE_NEON
51-
default y if ARM64 && KERNEL_MODE_NEON
51+
default y if ARM64
5252
default y if PPC64 && ALTIVEC
5353
default y if RISCV && RISCV_ISA_ZBC
5454
default y if X86
@@ -82,6 +82,7 @@ config CRC64
8282
config CRC64_ARCH
8383
bool
8484
depends on CRC64 && CRC_OPTIMIZATIONS
85+
default y if ARM64
8586
default y if RISCV && RISCV_ISA_ZBC && 64BIT
8687
default y if X86_64
8788

@@ -99,18 +100,27 @@ config CRC_OPTIMIZATIONS
99100

100101
config CRC_KUNIT_TEST
101102
tristate "KUnit tests for CRC functions" if !KUNIT_ALL_TESTS
102-
depends on KUNIT
103+
depends on KUNIT && (CRC7 || CRC16 || CRC_T10DIF || CRC32 || CRC64)
103104
default KUNIT_ALL_TESTS
105+
help
106+
Unit tests for the CRC library functions.
107+
108+
This is intended to help people writing architecture-specific
109+
optimized versions. If unsure, say N.
110+
111+
config CRC_ENABLE_ALL_FOR_KUNIT
112+
tristate "Enable all CRC functions for KUnit test"
113+
depends on KUNIT
104114
select CRC7
105115
select CRC16
106116
select CRC_T10DIF
107117
select CRC32
108118
select CRC64
109119
help
110-
Unit tests for the CRC library functions.
120+
Enable all CRC functions that have test code in CRC_KUNIT_TEST.
111121

112-
This is intended to help people writing architecture-specific
113-
optimized versions. If unsure, say N.
122+
Enable this only if you'd like the CRC KUnit test suite to test all
123+
the CRC variants, even ones that wouldn't otherwise need to be built.
114124

115125
config CRC_BENCHMARK
116126
bool "Benchmark for the CRC functions"

lib/crc/Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,14 @@ obj-$(CONFIG_CRC64) += crc64.o
3838
crc64-y := crc64-main.o
3939
ifeq ($(CONFIG_CRC64_ARCH),y)
4040
CFLAGS_crc64-main.o += -I$(src)/$(SRCARCH)
41+
42+
CFLAGS_REMOVE_arm64/crc64-neon-inner.o += $(CC_FLAGS_NO_FPU)
43+
CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) -march=armv8-a+crypto
44+
crc64-$(CONFIG_ARM64) += arm64/crc64-neon-inner.o
45+
4146
crc64-$(CONFIG_RISCV) += riscv/crc64_lsb.o riscv/crc64_msb.o
4247
crc64-$(CONFIG_X86) += x86/crc64-pclmul.o
43-
endif
48+
endif # CONFIG_CRC64_ARCH
4449

4550
obj-y += tests/
4651

lib/crc/arm64/crc-t10dif-core.S

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -181,13 +181,13 @@ SYM_FUNC_END(__pmull_p8_16x64)
181181

182182
pmull16x64_\p fold_consts, \reg1, v8
183183

184-
CPU_LE( rev64 v11.16b, v11.16b )
185-
CPU_LE( rev64 v12.16b, v12.16b )
184+
rev64 v11.16b, v11.16b
185+
rev64 v12.16b, v12.16b
186186

187187
pmull16x64_\p fold_consts, \reg2, v9
188188

189-
CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
190-
CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
189+
ext v11.16b, v11.16b, v11.16b, #8
190+
ext v12.16b, v12.16b, v12.16b, #8
191191

192192
eor \reg1\().16b, \reg1\().16b, v8.16b
193193
eor \reg2\().16b, \reg2\().16b, v9.16b
@@ -220,22 +220,22 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
220220
ldp q4, q5, [buf, #0x40]
221221
ldp q6, q7, [buf, #0x60]
222222
add buf, buf, #0x80
223-
CPU_LE( rev64 v0.16b, v0.16b )
224-
CPU_LE( rev64 v1.16b, v1.16b )
225-
CPU_LE( rev64 v2.16b, v2.16b )
226-
CPU_LE( rev64 v3.16b, v3.16b )
227-
CPU_LE( rev64 v4.16b, v4.16b )
228-
CPU_LE( rev64 v5.16b, v5.16b )
229-
CPU_LE( rev64 v6.16b, v6.16b )
230-
CPU_LE( rev64 v7.16b, v7.16b )
231-
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
232-
CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
233-
CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
234-
CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 )
235-
CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 )
236-
CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
237-
CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
238-
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
223+
rev64 v0.16b, v0.16b
224+
rev64 v1.16b, v1.16b
225+
rev64 v2.16b, v2.16b
226+
rev64 v3.16b, v3.16b
227+
rev64 v4.16b, v4.16b
228+
rev64 v5.16b, v5.16b
229+
rev64 v6.16b, v6.16b
230+
rev64 v7.16b, v7.16b
231+
ext v0.16b, v0.16b, v0.16b, #8
232+
ext v1.16b, v1.16b, v1.16b, #8
233+
ext v2.16b, v2.16b, v2.16b, #8
234+
ext v3.16b, v3.16b, v3.16b, #8
235+
ext v4.16b, v4.16b, v4.16b, #8
236+
ext v5.16b, v5.16b, v5.16b, #8
237+
ext v6.16b, v6.16b, v6.16b, #8
238+
ext v7.16b, v7.16b, v7.16b, #8
239239

240240
// XOR the first 16 data *bits* with the initial CRC value.
241241
movi v8.16b, #0
@@ -288,8 +288,8 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
288288
pmull16x64_\p fold_consts, v7, v8
289289
eor v7.16b, v7.16b, v8.16b
290290
ldr q0, [buf], #16
291-
CPU_LE( rev64 v0.16b, v0.16b )
292-
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
291+
rev64 v0.16b, v0.16b
292+
ext v0.16b, v0.16b, v0.16b, #8
293293
eor v7.16b, v7.16b, v0.16b
294294
subs len, len, #16
295295
b.ge .Lfold_16_bytes_loop_\@
@@ -310,8 +310,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
310310
// v0 = last 16 original data bytes
311311
add buf, buf, len
312312
ldr q0, [buf, #-16]
313-
CPU_LE( rev64 v0.16b, v0.16b )
314-
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
313+
rev64 v0.16b, v0.16b
314+
ext v0.16b, v0.16b, v0.16b, #8
315315

316316
// v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
317317
adr_l x4, .Lbyteshift_table + 16
@@ -344,8 +344,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
344344

345345
// Load the first 16 data bytes.
346346
ldr q7, [buf], #0x10
347-
CPU_LE( rev64 v7.16b, v7.16b )
348-
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
347+
rev64 v7.16b, v7.16b
348+
ext v7.16b, v7.16b, v7.16b, #8
349349

350350
// XOR the first 16 data *bits* with the initial CRC value.
351351
movi v0.16b, #0
@@ -382,8 +382,8 @@ SYM_FUNC_START(crc_t10dif_pmull_p8)
382382

383383
crc_t10dif_pmull p8
384384

385-
CPU_LE( rev64 v7.16b, v7.16b )
386-
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
385+
rev64 v7.16b, v7.16b
386+
ext v7.16b, v7.16b, v7.16b, #8
387387
str q7, [x3]
388388

389389
frame_pop

lib/crc/arm64/crc32-core.S

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,19 @@
2929
.endm
3030

3131
.macro hwordle, reg
32-
CPU_BE( rev16 \reg, \reg )
3332
.endm
3433

3534
.macro hwordbe, reg
36-
CPU_LE( rev \reg, \reg )
35+
rev \reg, \reg
3736
rbit \reg, \reg
38-
CPU_BE( lsr \reg, \reg, #16 )
3937
.endm
4038

4139
.macro le, regs:vararg
42-
.irp r, \regs
43-
CPU_BE( rev \r, \r )
44-
.endr
4540
.endm
4641

4742
.macro be, regs:vararg
4843
.irp r, \regs
49-
CPU_LE( rev \r, \r )
44+
rev \r, \r
5045
.endr
5146
.irp r, \regs
5247
rbit \r, \r

lib/crc/arm64/crc64-neon-inner.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Accelerated CRC64 (NVMe) using ARM NEON C intrinsics
4+
*/
5+
6+
#include <linux/types.h>
7+
#include <asm/neon-intrinsics.h>
8+
9+
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
10+
11+
/* x^191 mod G, x^127 mod G */
12+
static const u64 fold_consts_val[2] = { 0xeadc41fd2ba3d420ULL,
13+
0x21e9761e252621acULL };
14+
/* floor(x^127 / G), (G - x^64) / x */
15+
static const u64 bconsts_val[2] = { 0x27ecfa329aef9f77ULL,
16+
0x34d926535897936aULL };
17+
18+
static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b)
19+
{
20+
return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 0),
21+
vgetq_lane_u64(b, 0)));
22+
}
23+
24+
static inline uint64x2_t pmull64_high(uint64x2_t a, uint64x2_t b)
25+
{
26+
poly64x2_t l = vreinterpretq_p64_u64(a);
27+
poly64x2_t m = vreinterpretq_p64_u64(b);
28+
29+
return vreinterpretq_u64_p128(vmull_high_p64(l, m));
30+
}
31+
32+
static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b)
33+
{
34+
return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 1),
35+
vgetq_lane_u64(b, 0)));
36+
}
37+
38+
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len)
39+
{
40+
uint64x2_t fold_consts = vld1q_u64(fold_consts_val);
41+
uint64x2_t v0 = { crc, 0 };
42+
uint64x2_t zero = { };
43+
44+
for (;;) {
45+
v0 ^= vreinterpretq_u64_u8(vld1q_u8(p));
46+
47+
p += 16;
48+
len -= 16;
49+
if (len < 16)
50+
break;
51+
52+
v0 = pmull64(fold_consts, v0) ^ pmull64_high(fold_consts, v0);
53+
}
54+
55+
/* Multiply the 128-bit value by x^64 and reduce it back to 128 bits. */
56+
v0 = vextq_u64(v0, zero, 1) ^ pmull64_hi_lo(fold_consts, v0);
57+
58+
/* Final Barrett reduction */
59+
uint64x2_t bconsts = vld1q_u64(bconsts_val);
60+
uint64x2_t final = pmull64(bconsts, v0);
61+
62+
v0 ^= vextq_u64(zero, final, 1) ^ pmull64_hi_lo(bconsts, final);
63+
64+
return vgetq_lane_u64(v0, 1);
65+
}

lib/crc/arm64/crc64.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* CRC64 using ARM64 PMULL instructions
4+
*/
5+
6+
#include <linux/cpufeature.h>
7+
#include <asm/simd.h>
8+
#include <linux/minmax.h>
9+
#include <linux/sizes.h>
10+
11+
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
12+
13+
#define crc64_be_arch crc64_be_generic
14+
15+
static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
16+
{
17+
if (len >= 128 && cpu_have_named_feature(PMULL) &&
18+
likely(may_use_simd())) {
19+
size_t chunk = len & ~15;
20+
21+
scoped_ksimd()
22+
crc = crc64_nvme_arm64_c(crc, p, chunk);
23+
24+
p += chunk;
25+
len &= 15;
26+
}
27+
return crc64_nvme_generic(crc, p, len);
28+
}

0 commit comments

Comments
 (0)