|
13 | 13 | # limitations under the License. |
14 | 14 |
|
15 | 15 | # Determine the hardware features available in this CPU. |
16 | | -HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true") |
17 | 16 | HAVE_AVX2 := $(shell grep -qs avx2 /proc/cpuinfo && echo "true") |
18 | 17 | HAVE_AVX512 := $(shell grep -qs avx512f /proc/cpuinfo && echo "true") |
| 18 | +HAVE_BMI2 := $(shell grep -qs bmi2 /proc/cpuinfo && echo "true") |
| 19 | +HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true") |
19 | 20 |
|
20 | 21 | # Default targets. Always built. |
21 | 22 | BASIC_FILES := $(shell ls *.cc | egrep -v '_avx|_sse') |
22 | 23 |
|
23 | 24 | # Additional flags and targets for non-CUDA cases. |
24 | | -SSE_FILES = |
25 | | -AVX2_FILES = |
26 | | -AVX512_FILES = |
27 | | -ifneq (,$(HAVE_SSE)) |
28 | | - SSE_FLAGS ?= -msse4 |
29 | | - SSE_FILES := $(wildcard *_sse_test.cc) |
30 | | -endif |
31 | 25 | ifneq (,$(HAVE_AVX2)) |
32 | 26 | AVX2_FLAGS ?= -mavx2 -mfma |
33 | 27 | AVX2_FILES := $(wildcard *_avx_test.cc) |
34 | 28 | endif |
35 | 29 | ifneq (,$(HAVE_AVX512)) |
36 | | - AVX512_FLAGS ?= -mavx512f -mbmi2 |
| 30 | + AVX512_FLAGS ?= -mavx512f |
37 | 31 | AVX512_FILES := $(wildcard *_avx512_test.cc) |
38 | 32 | endif |
| 33 | +ifneq (,$(HAVE_BMI2)) |
| 34 | + BMI2_FLAGS ?= -mbmi2 |
| 35 | +endif |
| 36 | +ifneq (,$(HAVE_SSE)) |
| 37 | + SSE_FLAGS ?= -msse4 |
| 38 | + SSE_FILES := $(wildcard *_sse_test.cc) |
| 39 | +endif |
39 | 40 |
|
40 | 41 | CXX_FILES := $(BASIC_FILES) $(SSE_FILES) $(AVX2_FILES) $(AVX512_FILES) |
41 | 42 | CXX_TARGETS := $(CXX_FILES:%.cc=%.x) |
42 | | -CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) |
| 43 | +CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) $(BMI2_FLAGS) |
43 | 44 |
|
44 | 45 | CUDA_FILES := $(wildcard *cuda_test.cu) |
45 | 46 | CUDA_TARGETS := $(CUDA_FILES:%cuda_test.cu=%cuda_test.x) |
@@ -125,8 +126,17 @@ clean: |
125 | 126 | -rm -f ./*.x ./*.a ./*.so ./*.mod |
126 | 127 | rm -rf $(GTEST_DIR)/build |
127 | 128 |
|
128 | | -LOCAL_VARS = HAVE_SSE HAVE_AVX2 HAVE_AVX512 SSE_FLAGS AVX2_FLAGS $\ |
129 | | - AVX512_FLAGS CXXFLAGS CXX_TARGETS TEST_FLAGS |
| 129 | +LOCAL_VARS = BASIC_FILES CXX_FILES CXX_TARGETS CXXFLAGS $\ |
| 130 | + CUDA_FILES CUDA_TARGETS $\ |
| 131 | + CUSTATEVEC_FILES CUSTATEVEC_FLAGS $\ |
| 132 | + CUSTATEVECEX_FILES CUSTATEVECEX_FLAGS $\ |
| 133 | + HAVE_AVX2 AVX2_FILES AVX2_FLAGS $\ |
| 134 | + HAVE_AVX512 AVX512_FILES AVX512_FLAGS $\ |
| 135 | + HAVE_BMI2 BMI2_FLAGS $\ |
| 136 | + HAVE_SSE SSE_FILES SSE_FLAGS $\ |
| 137 | + HIP_FILES HIP_TARGETS $\ |
| 138 | + GMOCK_DIR GTEST_DIR $\ |
| 139 | + TESTFLAGS |
130 | 140 |
|
131 | 141 | .PHONY: print-vars |
132 | 142 | print-vars: ; @$(foreach n,$(sort $(LOCAL_VARS)),echo $n=$($n);) |
0 commit comments