Skip to content

Commit 99f98ed

Browse files
committed
Use SSE4.1 on Windows
1 parent f774629 commit 99f98ed

4 files changed

Lines changed: 12 additions & 4 deletions

File tree

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ if(BUILD_TESTS)
106106
add_library(gui_testable_source SHARED ${JUCE_SOURCE})
107107
if(MSVC)
108108
set_property(TARGET gui_testable_source APPEND PROPERTY COMPILE_DEFINITIONS "JUCE_API=__declspec(dllexport)")
109-
target_compile_options(gui_testable_source PRIVATE /sdl- /nologo /MP /W0 /bigobj)
109+
# /arch:AVX enables SSE4.1 intrinsics (required for proper SIMD rounding in SIMDConverter)
110+
target_compile_options(gui_testable_source PRIVATE /sdl- /nologo /MP /W0 /bigobj /arch:AVX)
110111
endif()
111112
target_include_directories(gui_testable_source PRIVATE ${JUCE_DIRECTORY} ${JUCE_DIRECTORY}/modules)
112113
target_compile_features(gui_testable_source PUBLIC cxx_auto_type cxx_generalized_initializers cxx_std_17)
@@ -188,7 +189,8 @@ endif()
188189

189190
if(MSVC)
190191
set_property(TARGET open-ephys APPEND PROPERTY COMPILE_DEFINITIONS "JUCE_API=__declspec(dllexport)")
191-
target_compile_options(open-ephys PRIVATE /sdl- /nologo /MP /W0 /bigobj)
192+
# /arch:AVX enables SSE4.1 intrinsics (required for proper SIMD rounding in SIMDConverter)
193+
target_compile_options(open-ephys PRIVATE /sdl- /nologo /MP /W0 /bigobj /arch:AVX)
192194
target_link_libraries(open-ephys setupapi.lib opengl32.lib glu32.lib)
193195
set_property(TARGET open-ephys APPEND PROPERTY VS_DEBUGGER_COMMAND "Debug\\open-ephys.exe")
194196
set_property(TARGET open-ephys APPEND_STRING PROPERTY LINK_FLAGS_DEBUG " /NODEFAULTLIB:\"libcmt.lib\" /NODEFAULTLIB:\"msvcrt.lib\"")

Source/Processors/RecordNode/BinaryFormat/BinaryRecording.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ BinaryRecording::BinaryRecording()
4040
m_scaledBuffer.malloc (MAX_BUFFER_SIZE);
4141
m_intBuffer.malloc (MAX_BUFFER_SIZE);
4242
m_sampleNumberBuffer.malloc (MAX_BUFFER_SIZE);
43+
44+
LOGD ("BinaryRecording SIMD type: ", SIMDConverter::getSIMDTypeString());
4345
}
4446

4547
BinaryRecording::~BinaryRecording() {}

Source/Processors/RecordNode/BinaryFormat/SIMDConverter.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,7 @@ void SIMDConverter::convertSSE4_1 (const float* input, int16_t* output, float sc
273273
f0 = _mm_mul_ps (f0, vscale);
274274
f1 = _mm_mul_ps (f1, vscale);
275275

276-
// Round to nearest integer (SSE4.1)
277-
// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC = 0x00 | 0x08 = 0x08
276+
// Round to nearest integer (SSE4.1 - uses banker's rounding)
278277
f0 = _mm_round_ps (f0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
279278
f1 = _mm_round_ps (f1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
280279

Source/Processors/RecordNode/BinaryFormat/SIMDConverter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
#ifndef SIMDCONVERTER_H
2525
#define SIMDCONVERTER_H
2626

27+
// MSVC with /arch:AVX doesn't define __SSE4_1__, but AVX implies SSE4.1 support
28+
#if defined(_MSC_VER) && defined(__AVX__) && !defined(__SSE4_1__)
29+
#define __SSE4_1__ 1
30+
#endif
31+
2732
#include <cstdint>
2833
#include <string>
2934

0 commit comments

Comments
 (0)