11#
22# Encapsulates building FFTW as an External Project.
33#
4- # NOTE: internal building of fftw is for convenience,
5- # and the version of fftw built here does not
6- # use modern hardware optimzations.
4+ # SIMD codelet selection
5+ # ----------------------
6+ # FFTW SIMD codelets are hand-written assembly routines baked into the
7+ # library at compile time. Passing -march=native to the ITK build does
8+ # NOT activate them; they must be requested explicitly via FFTW's own
9+ # CMake options (ENABLE_NEON, ENABLE_SSE, ENABLE_SSE2, ENABLE_AVX, ENABLE_AVX2).
710#
8- # The build configuration chosen to be
9- # generalizable to as many hardware platforms.
10- # Being backward compatible for decades
11- # old hardware is the goal of this internal
12- # representation.
11+ # This file detects appropriate defaults at cmake configure time:
1312#
14- # This is primarily used to support testing
15- # and should not be used for production
16- # builds where performance is a concern.
13+ # Native builds (CMAKE_CROSSCOMPILING is false):
14+ # - ARM64 (aarch64/arm64): NEON=ON (mandatory in ARMv8); x86 SIMD off.
15+ # - x86/x86_64: each of SSE, SSE2, AVX, AVX2 is probed individually
16+ # via __builtin_cpu_supports() / CheckCSourceRuns so that the
17+ # detected flags match the actual build-host CPU. A pre-AVX
18+ # Sandy Bridge gets SSE+SSE2 only; a Haswell or later gets all four.
19+ # - Other architectures: all SIMD off (conservative fallback).
20+ #
21+ # Cross-compiled builds (CMAKE_CROSSCOMPILING is true):
22+ # - ARM64: NEON=ON (mandatory); x86 SIMD off.
23+ # - x86_64: SSE+SSE2 only (baseline; AVX/AVX2 not assumed for target).
24+ # - Other: all SIMD off.
25+ #
26+ # Every flag is an individually overridable cache option, e.g.:
27+ # cmake -DFFTW_ENABLE_AVX2=OFF ...
1728#
1829# These instructions follow the guidance provided for modern cmake usage as described:
1930# https://github.com/dev-cafe/cmake-cookbook/blob/master/chapter-08/recipe-03/c-example/external/upstream/fftw3/CMakeLists.txt
@@ -53,6 +64,55 @@ if(NOT ITK_USE_SYSTEM_FFTW)
5364 set (_fftw_url "https://data.kitware.com/api/v1/file/hashsum/sha512/${_fftw_url_hash} /download" )
5465
5566 set (FFTW_STAGED_INSTALL_PREFIX "${ITK_BINARY_DIR} /fftw" )
67+
68+ # Detect SIMD defaults (see file header for full policy description).
69+ # CheckCSourceRuns results are cached after the first cmake configure run.
70+ include (CheckCSourceRuns )
71+
72+ set (_fftw_default_neon OFF )
73+ set (_fftw_default_sse OFF )
74+ set (_fftw_default_sse2 OFF )
75+ set (_fftw_default_avx OFF )
76+ set (_fftw_default_avx2 OFF )
77+
78+ if (NOT CMAKE_CROSSCOMPILING )
79+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64" )
80+ # NEON is mandatory in ARMv8/AArch64 — every arm64 CPU has it.
81+ set (_fftw_default_neon ON )
82+ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|i686" )
83+ # Probe each x86 SIMD level individually via CPUID so the defaults
84+ # are accurate for the actual build-host CPU (e.g. pre-AVX Sandy Bridge
85+ # or pre-AVX2 Ivy Bridge get only the levels their hardware supports).
86+ # __builtin_cpu_supports is a GCC/Clang intrinsic; skip on MSVC.
87+ if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang|AppleClang" )
88+ foreach (_fftw_simd IN ITEMS sse sse2 avx avx2)
89+ check_c_source_runs (
90+ "int main(void){return __builtin_cpu_supports(\" ${_fftw_simd} \" )?0:1;}"
91+ _fftw_cpu_has_${_fftw_simd}
92+ )
93+ if (_fftw_cpu_has_${_fftw_simd} )
94+ set (_fftw_default_${_fftw_simd} ON )
95+ endif ()
96+ endforeach ()
97+ endif ()
98+ endif ()
99+ else ()
100+ # Cross-compiling: conservative architecture-level fallback.
101+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64" )
102+ set (_fftw_default_neon ON )
103+ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64" )
104+ # SSE/SSE2 are baseline on all 64-bit x86 CPUs; AVX/AVX2 not assumed.
105+ set (_fftw_default_sse ON )
106+ set (_fftw_default_sse2 ON )
107+ endif ()
108+ endif ()
109+
110+ option (FFTW_ENABLE_NEON "Enable FFTW NEON SIMD codelets (ARM64)" ${_fftw_default_neon} )
111+ option (FFTW_ENABLE_SSE "Enable FFTW SSE SIMD codelets (x86)" ${_fftw_default_sse} )
112+ option (FFTW_ENABLE_SSE2 "Enable FFTW SSE2 SIMD codelets (x86)" ${_fftw_default_sse2} )
113+ option (FFTW_ENABLE_AVX "Enable FFTW AVX SIMD codelets (x86)" ${_fftw_default_avx} )
114+ option (FFTW_ENABLE_AVX2 "Enable FFTW AVX2 SIMD codelets (x86)" ${_fftw_default_avx2} )
115+
56116 set (PROJ_FFTWD_DEPENDS "" )
57117 if (ITK_USE_FFTWF)
58118 itk_download_attempt_check (FFTW )
@@ -76,14 +136,15 @@ if(NOT ITK_USE_SYSTEM_FFTW)
76136 -DCMAKE_INSTALL_LIBDIR:STRING=${CMAKE_INSTALL_LIBDIR}
77137 -DCMAKE_INSTALL_BINDIR:STRING=${CMAKE_INSTALL_BINDIR}
78138 -DDISABLE_FORTRAN:BOOL=ON
79- -DENABLE_AVX:BOOL=OFF
80- -DENABLE_AVX2:BOOL=OFF
139+ -DENABLE_AVX:BOOL=${FFTW_ENABLE_AVX}
140+ -DENABLE_AVX2:BOOL=${FFTW_ENABLE_AVX2}
81141 -DENABLE_FLOAT:BOOL=ON
82142 -DENABLE_LONG_DOUBLE:BOOL=OFF
143+ -DENABLE_NEON:BOOL=${FFTW_ENABLE_NEON}
83144 -DENABLE_OPENMP:BOOL=OFF
84145 -DENABLE_QUAD_PRECISION:BOOL=OFF
85- -DENABLE_SSE:BOOL=OFF
86- -DENABLE_SSE2:BOOL=OFF
146+ -DENABLE_SSE:BOOL=${FFTW_ENABLE_SSE}
147+ -DENABLE_SSE2:BOOL=${FFTW_ENABLE_SSE2}
87148 -DENABLE_THREADS:BOOL=ON
88149 -DCMAKE_APPLE_SILICON_PROCESSOR:STRING=${CMAKE_APPLE_SILICON_PROCESSOR}
89150 -DCMAKE_C_COMPILER_LAUNCHER:PATH=${CMAKE_C_COMPILER_LAUNCHER}
@@ -132,14 +193,15 @@ if(NOT ITK_USE_SYSTEM_FFTW)
132193 -DCMAKE_INSTALL_LIBDIR:STRING=${CMAKE_INSTALL_LIBDIR}
133194 -DCMAKE_INSTALL_BINDIR:STRING=${CMAKE_INSTALL_BINDIR}
134195 -DDISABLE_FORTRAN:BOOL=ON
135- -DENABLE_AVX:BOOL=OFF
136- -DENABLE_AVX2:BOOL=OFF
196+ -DENABLE_AVX:BOOL=${FFTW_ENABLE_AVX}
197+ -DENABLE_AVX2:BOOL=${FFTW_ENABLE_AVX2}
137198 -DENABLE_FLOAT:BOOL=OFF
138199 -DENABLE_LONG_DOUBLE:BOOL=OFF
200+ -DENABLE_NEON:BOOL=${FFTW_ENABLE_NEON}
139201 -DENABLE_OPENMP:BOOL=OFF
140202 -DENABLE_QUAD_PRECISION:BOOL=OFF
141- -DENABLE_SSE:BOOL=OFF
142- -DENABLE_SSE2:BOOL=OFF
203+ -DENABLE_SSE:BOOL=${FFTW_ENABLE_SSE}
204+ -DENABLE_SSE2:BOOL=${FFTW_ENABLE_SSE2}
143205 -DENABLE_THREADS:BOOL=ON
144206 -DCMAKE_APPLE_SILICON_PROCESSOR:STRING=${CMAKE_APPLE_SILICON_PROCESSOR}
145207 -DCMAKE_C_COMPILER_LAUNCHER:PATH=${CMAKE_C_COMPILER_LAUNCHER}
0 commit comments