Skip to content

Commit 9b8cea6

Browse files
authored
Add -mbmi2 flag when -mavx2 is used in Makefiles (#992)
It looks like it's possible to use the `-mbmi2` flag when `-mavx2` is used, based on documentation and on testing locally. This was done in some Makefiles but not consistently. This PR adds the flag to Makefiles where it was missing, plus also in one BUILD file.
1 parent 40190fb commit 9b8cea6

2 files changed

Lines changed: 30 additions & 13 deletions

File tree

pybind_interface/avx2/CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,15 @@ if(WIN32)
2020
# Add /O2 to any configuration that is NOT Debug.
2121
# This prevents a conflict with /RTC1 in DEBUG builds.
2222
add_compile_options($<$<NOT:$<CONFIG:Debug>>:/O2>)
23-
else()
23+
elseif(LINUX)
2424
add_compile_options(-mavx2 -mfma -O3 -flto=auto)
25+
execute_process(
26+
COMMAND bash --noprofile -c "grep -qs bmi2 /proc/cpuinfo"
27+
RESULT_VARIABLE _EXIT_CODE
28+
)
29+
if(_EXIT_CODE EQUAL 0)
30+
add_compile_options("-mbmi2")
31+
endif()
2532
endif()
2633

2734
if(APPLE)

tests/Makefile

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,34 @@
1313
# limitations under the License.
1414

1515
# Determine the hardware features available in this CPU.
16-
HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true")
1716
HAVE_AVX2 := $(shell grep -qs avx2 /proc/cpuinfo && echo "true")
1817
HAVE_AVX512 := $(shell grep -qs avx512f /proc/cpuinfo && echo "true")
18+
HAVE_BMI2 := $(shell grep -qs bmi2 /proc/cpuinfo && echo "true")
19+
HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true")
1920

2021
# Default targets. Always built.
2122
BASIC_FILES := $(shell ls *.cc | egrep -v '_avx|_sse')
2223

2324
# Additional flags and targets for non-CUDA cases.
24-
SSE_FILES =
25-
AVX2_FILES =
26-
AVX512_FILES =
27-
ifneq (,$(HAVE_SSE))
28-
SSE_FLAGS ?= -msse4
29-
SSE_FILES := $(wildcard *_sse_test.cc)
30-
endif
3125
ifneq (,$(HAVE_AVX2))
3226
AVX2_FLAGS ?= -mavx2 -mfma
3327
AVX2_FILES := $(wildcard *_avx_test.cc)
3428
endif
3529
ifneq (,$(HAVE_AVX512))
36-
AVX512_FLAGS ?= -mavx512f -mbmi2
30+
AVX512_FLAGS ?= -mavx512f
3731
AVX512_FILES := $(wildcard *_avx512_test.cc)
3832
endif
33+
ifneq (,$(HAVE_BMI2))
34+
BMI2_FLAGS ?= -mbmi2
35+
endif
36+
ifneq (,$(HAVE_SSE))
37+
SSE_FLAGS ?= -msse4
38+
SSE_FILES := $(wildcard *_sse_test.cc)
39+
endif
3940

4041
CXX_FILES := $(BASIC_FILES) $(SSE_FILES) $(AVX2_FILES) $(AVX512_FILES)
4142
CXX_TARGETS := $(CXX_FILES:%.cc=%.x)
42-
CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS)
43+
CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) $(BMI2_FLAGS)
4344

4445
CUDA_FILES := $(wildcard *cuda_test.cu)
4546
CUDA_TARGETS := $(CUDA_FILES:%cuda_test.cu=%cuda_test.x)
@@ -125,8 +126,17 @@ clean:
125126
-rm -f ./*.x ./*.a ./*.so ./*.mod
126127
rm -rf $(GTEST_DIR)/build
127128

128-
LOCAL_VARS = HAVE_SSE HAVE_AVX2 HAVE_AVX512 SSE_FLAGS AVX2_FLAGS $\
129-
AVX512_FLAGS CXXFLAGS CXX_TARGETS TEST_FLAGS
129+
LOCAL_VARS = BASIC_FILES CXX_FILES CXX_TARGETS CXXFLAGS $\
130+
CUDA_FILES CUDA_TARGETS $\
131+
CUSTATEVEC_FILES CUSTATEVEC_FLAGS $\
132+
CUSTATEVECEX_FILES CUSTATEVECEX_FLAGS $\
133+
HAVE_AVX2 AVX2_FILES AVX2_FLAGS $\
134+
HAVE_AVX512 AVX512_FILES AVX512_FLAGS $\
135+
HAVE_BMI2 BMI2_FLAGS $\
136+
HAVE_SSE SSE_FILES SSE_FLAGS $\
137+
HIP_FILES HIP_TARGETS $\
138+
GMOCK_DIR GTEST_DIR $\
139+
TESTFLAGS
130140

131141
.PHONY: print-vars
132142
print-vars: ; @$(foreach n,$(sort $(LOCAL_VARS)),echo $n=$($n);)

0 commit comments

Comments
 (0)