Skip to content

Commit fb59f70

Browse files
HecaiYuanmr-c
authored andcommitted
simde/x86/avx512/fixupimm.h: work around GCC bug 121064
Use simde_memcpy instead of direct assignment to prevent GCC from generating incorrect vshuf.w instructions on LoongArch with -Ofast. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121064
1 parent 9abb59c commit fb59f70

2 files changed

Lines changed: 9 additions & 2 deletions

File tree

simde/simde-common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1079,7 +1079,7 @@ HEDLEY_DIAGNOSTIC_POP
10791079
# if HEDLEY_GCC_VERSION_CHECK(16,0,0)
10801080
# define SIMDE_BUG_GCC_123807
10811081
# endif
1082-
# if defined(SIMDE_LOONGARCH_LSX_NATIVE) && \
1082+
# if defined(SIMDE_ARCH_LOONGARCH) && \
10831083
((HEDLEY_GCC_VERSION_CHECK(14,0,0) && !HEDLEY_GCC_VERSION_CHECK(14,4,0)) || \
10841084
(HEDLEY_GCC_VERSION_CHECK(15,0,0) && !HEDLEY_GCC_VERSION_CHECK(15,2,0)))
10851085
# define SIMDE_BUG_GCC_121064

simde/x86/avx512/fixupimm.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,14 @@ simde_mm_fixupimm_ss (simde__m128 a, simde__m128 b, simde__m128i c, int imm8)
401401

402402
switch (((c_.i32[0] >> (select << 2)) & 15)) {
403403
case 0:
404-
b_.f32[0] = a_.f32[0];
404+
#if defined(SIMDE_BUG_GCC_121064)
405+
{
406+
simde_float32 tmp = a_.f32[0];
407+
simde_memcpy(&b_.f32[0], &tmp, sizeof(tmp));
408+
}
409+
#else
410+
b_.f32[0] = a_.f32[0];
411+
#endif
405412
break;
406413
case 2:
407414
b_.f32[0] = SIMDE_MATH_NANF;

0 commit comments

Comments
 (0)