Skip to content

Commit df6922d

Browse files
authored
Fix cross-compiling librt by enabling x86_64 optimisations with pragmas (#20815)
Although Python still lacks a supported method to cross-compile packages, many downstreams do it anyway, so librt should allow this. The current approach of enabling x86_64 optimisations after checking `platform.machine()` is broken. When building on x86_64 for another architecture, the build fails because the optimisations get enabled when they shouldn't. Conversely, when building on another architecture for x86_64, the build fails because the optimisations don't get enabled when they should. GCC supports enabling an optimisation with a pragma from that line onwards. Clang requires the optimisation to be pushed on and popped off the stack. Unfortunately, MSVC does not have an equivalent feature, but it is unlikely anyone would cross-compile to x86_64 with that. The remaining logic for MSVC could be simplified, but it looks like other compilers like Borland are potentially supported? I considered checking for x86_64 using CCompiler's `preprocess()` instead, but this seemed awkward. This has been tested with GCC and Clang, including with AVX512 enabled. Supersedes mypyc/librt#32.
1 parent 694f767 commit df6922d

8 files changed

Lines changed: 50 additions & 16 deletions

File tree

mypyc/build_setup.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,11 @@
1919
from distutils import ccompiler
2020

2121
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
22-
"unix": {
23-
"base64/arch/ssse3": ["-mssse3"],
24-
"base64/arch/sse41": ["-msse4.1"],
25-
"base64/arch/sse42": ["-msse4.2"],
26-
"base64/arch/avx2": ["-mavx2"],
27-
"base64/arch/avx": ["-mavx"],
28-
},
2922
"msvc": {
3023
"base64/arch/sse42": ["/arch:SSE4.2"],
3124
"base64/arch/avx2": ["/arch:AVX2"],
3225
"base64/arch/avx": ["/arch:AVX"],
33-
},
26+
}
3427
}
3528

3629
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]

mypyc/lib-rt/base64/arch/avx/codec.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
#include "../../env.h"
1010

1111
#if HAVE_AVX
12+
#if defined(__clang__)
13+
#pragma clang attribute push (__attribute__((target("avx"))), apply_to=function)
14+
#else
15+
#pragma GCC target("avx")
16+
#endif
1217
#include <immintrin.h>
1318

1419
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
@@ -62,6 +67,9 @@ base64_stream_decode_avx BASE64_DEC_PARAMS
6267
#include "../generic/dec_head.c"
6368
dec_loop_ssse3(&s, &slen, &o, &olen);
6469
#include "../generic/dec_tail.c"
70+
#if defined(__clang__)
71+
#pragma clang attribute pop
72+
#endif
6573
#else
6674
return base64_dec_stub(state, src, srclen, out, outlen);
6775
#endif

mypyc/lib-rt/base64/arch/avx2/codec.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
#include "../../env.h"
1010

1111
#if HAVE_AVX2
12+
#if defined(__clang__)
13+
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
14+
#else
15+
#pragma GCC target("avx2")
16+
#endif
1217
#include <immintrin.h>
1318

1419
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
@@ -52,6 +57,9 @@ base64_stream_decode_avx2 BASE64_DEC_PARAMS
5257
#include "../generic/dec_head.c"
5358
dec_loop_avx2(&s, &slen, &o, &olen);
5459
#include "../generic/dec_tail.c"
60+
#if defined(__clang__)
61+
#pragma clang attribute pop
62+
#endif
5563
#else
5664
return base64_dec_stub(state, src, srclen, out, outlen);
5765
#endif

mypyc/lib-rt/base64/arch/avx512/codec.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
#include "../../env.h"
1010

1111
#if HAVE_AVX512
12+
#if defined(__clang__)
13+
#pragma clang attribute push (__attribute__((target("avx512vbmi"))), apply_to=function)
14+
#else
15+
#pragma GCC target("avx512vbmi")
16+
#endif
1217
#include <immintrin.h>
1318

1419
#include "../avx2/dec_reshuffle.c"
@@ -38,6 +43,9 @@ base64_stream_decode_avx512 BASE64_DEC_PARAMS
3843
#include "../generic/dec_head.c"
3944
dec_loop_avx2(&s, &slen, &o, &olen);
4045
#include "../generic/dec_tail.c"
46+
#if defined(__clang__)
47+
#pragma clang attribute pop
48+
#endif
4149
#else
4250
return base64_dec_stub(state, src, srclen, out, outlen);
4351
#endif

mypyc/lib-rt/base64/arch/sse41/codec.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
#include "../../env.h"
1010

1111
#if HAVE_SSE41
12+
#if defined(__clang__)
13+
#pragma clang attribute push (__attribute__((target("sse4.1"))), apply_to=function)
14+
#else
15+
#pragma GCC target("sse4.1")
16+
#endif
1217
#include <smmintrin.h>
1318

1419
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
@@ -52,6 +57,9 @@ base64_stream_decode_sse41 BASE64_DEC_PARAMS
5257
#include "../generic/dec_head.c"
5358
dec_loop_ssse3(&s, &slen, &o, &olen);
5459
#include "../generic/dec_tail.c"
60+
#if defined(__clang__)
61+
#pragma clang attribute pop
62+
#endif
5563
#else
5664
return base64_dec_stub(state, src, srclen, out, outlen);
5765
#endif

mypyc/lib-rt/base64/arch/sse42/codec.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
#include "../../env.h"
1010

1111
#if HAVE_SSE42
12+
#if defined(__clang__)
13+
#pragma clang attribute push (__attribute__((target("sse4.2"))), apply_to=function)
14+
#else
15+
#pragma GCC target("sse4.2")
16+
#endif
1217
#include <nmmintrin.h>
1318

1419
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
@@ -52,6 +57,9 @@ base64_stream_decode_sse42 BASE64_DEC_PARAMS
5257
#include "../generic/dec_head.c"
5358
dec_loop_ssse3(&s, &slen, &o, &olen);
5459
#include "../generic/dec_tail.c"
60+
#if defined(__clang__)
61+
#pragma clang attribute pop
62+
#endif
5563
#else
5664
return base64_dec_stub(state, src, srclen, out, outlen);
5765
#endif

mypyc/lib-rt/base64/arch/ssse3/codec.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
#include "../../env.h"
1010

1111
#if HAVE_SSSE3
12+
#if defined(__clang__)
13+
#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function)
14+
#else
15+
#pragma GCC target("ssse3")
16+
#endif
1217
#include <tmmintrin.h>
1318

1419
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
@@ -54,6 +59,9 @@ base64_stream_decode_ssse3 BASE64_DEC_PARAMS
5459
#include "../generic/dec_head.c"
5560
dec_loop_ssse3(&s, &slen, &o, &olen);
5661
#include "../generic/dec_tail.c"
62+
#if defined(__clang__)
63+
#pragma clang attribute pop
64+
#endif
5765
#else
5866
return base64_dec_stub(state, src, srclen, out, outlen);
5967
#endif

mypyc/lib-rt/build_setup.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,11 @@
1919
from distutils import ccompiler
2020

2121
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
22-
"unix": {
23-
"base64/arch/ssse3": ["-mssse3"],
24-
"base64/arch/sse41": ["-msse4.1"],
25-
"base64/arch/sse42": ["-msse4.2"],
26-
"base64/arch/avx2": ["-mavx2"],
27-
"base64/arch/avx": ["-mavx"],
28-
},
2922
"msvc": {
3023
"base64/arch/sse42": ["/arch:SSE4.2"],
3124
"base64/arch/avx2": ["/arch:AVX2"],
3225
"base64/arch/avx": ["/arch:AVX"],
33-
},
26+
}
3427
}
3528

3629
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]

0 commit comments

Comments
 (0)