I tried to test original SSE/AVX functions compared with Agner Fog's vector class(My CPU supports both SSE and AVX instructions). Everything was in line with forecast on GNU compiler. Then I switched to MSVC and suddenly found that all vector class options were about 10 times or even more slower than original SSE/AVX functions. My IDE is VS2019/Clion, and the GNU version is 12.2. In addition, I used CMake to conduct compiling. The specific configurations of compilers are as follows:
cmake_minimum_required(VERSION 3.21)
project(testSIMD)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/external_modules/cmakeFind/")
set(CMAKE_CXX_STANDARD 17)
set(USE_VECTORIZE ON)
if (NOT VECTOR_TYPE)
set(VECTOR_TYPE full_vectorize)
endif (NOT VECTOR_TYPE)
if (VECTOR_TYPE STREQUAL "none")
set(VECTOR_TYPE none)
endif (VECTOR_TYPE STREQUAL "none")
if (VECTOR_TYPE STREQUAL "default")
set(VECTOR_TYPE default)
endif (VECTOR_TYPE STREQUAL "default")
# Set vectorization flags for a few compilers
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
find_package(VectorFOG REQUIRED)
#Pushing vector flags onto compiler flags for try compiles
set(CMAKE_C_FLAGS_SAVE ${CMAKE_C_FLAGS})
set(CMAKE_CXX_FLAGS_SAVE ${CMAKE_CXX_FLAGS})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${VECTOR_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VECTOR_CXX_FLAGS}")
try_compile(HAVE_FOG_VECTOR_CLASS "${CMAKE_BINARY_DIR}" "${CMAKE_SOURCE_DIR}/fogvectorclasstest.cpp")
if(HAVE_FOG_VECTOR_CLASS)
message(STATUS "Trying Fog Vector Class -- works")
add_definitions(-DHAVE_FOG_VECTOR_CLASS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMAX_VECTOR_SIZE=512")
set (FILE_LIST ${FILE_LIST}
kahan_fog_vector.cpp
kahan_fog_vector8.cpp)
else()
message(STATUS "Trying Fog Vector Class -- fails")
endif()
# Set vectorization flags for GNU compiler
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # using GCC
if (USE_VECTORIZE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -fstrict-aliasing -fopenmp-simd -march=native -mtune=native -ffast-math -ftree-vectorize -fopt-info-vec-optimized")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -fstrict-aliasing -march=native -mtune=native -ffast-math -fno-tree-vectorize -fopt-info-vec-optimized")
endif ()
if ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "7.4.0")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mprefer-vector-width=512")
endif ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "7.4.0")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") # using Clang
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -fstrict-aliasing -fvectorize -march=native -mtune=native -ffast-math -Rpass-analysis=loop-vectorize")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O3")
add_compile_options(/arch:AVX2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qvec-report:2")
endif ()
I was quite confused about this problem. If anyone knows why it happened, please let me know.