cmake : pass CPU architecture flags to nvcc (#5146)

This commit is contained in:
Jared Van Bortel 2024-01-26 15:34:06 -05:00 committed by GitHub
parent 62fead3ea0
commit bbe7c56c99
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -466,17 +466,17 @@ function(get_flags CCID CCVER)
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
) )
set(C_FLAGS ${C_FLAGS} -Wdouble-promotion) list(APPEND C_FLAGS -Wdouble-promotion)
endif() endif()
elseif (CCID STREQUAL "GNU") elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion) set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds) set(CXX_FLAGS -Wno-array-bounds)
if (CCVER VERSION_GREATER_EQUAL 7.1.0) if (CCVER VERSION_GREATER_EQUAL 7.1.0)
set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation) list(APPEND CXX_FLAGS -Wno-format-truncation)
endif() endif()
if (CCVER VERSION_GREATER_EQUAL 8.1.0) if (CCVER VERSION_GREATER_EQUAL 8.1.0)
set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi) list(APPEND CXX_FLAGS -Wextra-semi)
endif() endif()
elseif (CCID MATCHES "Intel") elseif (CCID MATCHES "Intel")
# enable max optimization level when using Intel compiler # enable max optimization level when using Intel compiler
@ -510,16 +510,18 @@ if (LLAMA_ALL_WARNINGS)
endif() endif()
endif() endif()
set(CUDA_CXX_FLAGS "")
if (LLAMA_CUBLAS) if (LLAMA_CUBLAS)
set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math) set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
if (NOT MSVC) if (NOT MSVC)
set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic) list(APPEND CUDA_FLAGS -Wno-pedantic)
endif() endif()
if (LLAMA_ALL_WARNINGS AND NOT MSVC) if (LLAMA_ALL_WARNINGS AND NOT MSVC)
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER}) list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
endif() endif()
execute_process( execute_process(
@ -547,13 +549,8 @@ if (LLAMA_CUBLAS)
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
get_flags(${CUDA_CCID} ${CUDA_CCVER}) get_flags(${CUDA_CCID} ${CUDA_CCVER})
list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument list(APPEND CUDA_CXX_FLAGS ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
if (NOT CUDA_CXX_FLAGS STREQUAL "")
set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
endif()
endif() endif()
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
endif() endif()
if (WIN32) if (WIN32)
@ -618,12 +615,7 @@ if (NOT MSVC)
endif() endif()
endif() endif()
function(add_compile_option_cpp ARG) set(ARCH_FLAGS "")
# Adds a compile option to C/C++ only, but not for Cuda.
# Use, e.g., for CPU-architecture flags.
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${ARG}>)
add_compile_options($<$<COMPILE_LANGUAGE:C>:${ARG}>)
endfunction()
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64")) if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
message(STATUS "ARM detected") message(STATUS "ARM detected")
@ -636,19 +628,19 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
else() else()
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E) check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
add_compile_options(-mfp16-format=ieee) list(APPEND ARCH_FLAGS -mfp16-format=ieee)
endif() endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
# Raspberry Pi 1, Zero # Raspberry Pi 1, Zero
add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access) list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
endif() endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
# Raspberry Pi 2 # Raspberry Pi 2
add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations) list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
endif() endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
# Raspberry Pi 3, 4, Zero 2 (32-bit) # Raspberry Pi 3, 4, Zero 2 (32-bit)
add_compile_options(-mno-unaligned-access) list(APPEND ARCH_FLAGS -mno-unaligned-access)
endif() endif()
endif() endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" ) elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
@ -659,7 +651,7 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
include(cmake/FindSIMD.cmake) include(cmake/FindSIMD.cmake)
endif () endif ()
if (LLAMA_AVX512) if (LLAMA_AVX512)
add_compile_option_cpp(/arch:AVX512) list(APPEND ARCH_FLAGS /arch:AVX512)
# MSVC has no compile-time flags enabling specific # MSVC has no compile-time flags enabling specific
# AVX512 extensions, neither it defines the # AVX512 extensions, neither it defines the
# macros corresponding to the extensions. # macros corresponding to the extensions.
@ -673,49 +665,61 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>) add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
endif() endif()
elseif (LLAMA_AVX2) elseif (LLAMA_AVX2)
add_compile_option_cpp(/arch:AVX2) list(APPEND ARCH_FLAGS /arch:AVX2)
elseif (LLAMA_AVX) elseif (LLAMA_AVX)
add_compile_option_cpp(/arch:AVX) list(APPEND ARCH_FLAGS /arch:AVX)
endif() endif()
else() else()
if (LLAMA_NATIVE) if (LLAMA_NATIVE)
add_compile_option_cpp(-march=native) list(APPEND ARCH_FLAGS -march=native)
endif() endif()
if (LLAMA_F16C) if (LLAMA_F16C)
add_compile_option_cpp(-mf16c) list(APPEND ARCH_FLAGS -mf16c)
endif() endif()
if (LLAMA_FMA) if (LLAMA_FMA)
add_compile_option_cpp(-mfma) list(APPEND ARCH_FLAGS -mfma)
endif() endif()
if (LLAMA_AVX) if (LLAMA_AVX)
add_compile_option_cpp(-mavx) list(APPEND ARCH_FLAGS -mavx)
endif() endif()
if (LLAMA_AVX2) if (LLAMA_AVX2)
add_compile_option_cpp(-mavx2) list(APPEND ARCH_FLAGS -mavx2)
endif() endif()
if (LLAMA_AVX512) if (LLAMA_AVX512)
add_compile_option_cpp(-mavx512f) list(APPEND ARCH_FLAGS -mavx512f)
add_compile_option_cpp(-mavx512bw) list(APPEND ARCH_FLAGS -mavx512bw)
endif() endif()
if (LLAMA_AVX512_VBMI) if (LLAMA_AVX512_VBMI)
add_compile_option_cpp(-mavx512vbmi) list(APPEND ARCH_FLAGS -mavx512vbmi)
endif() endif()
if (LLAMA_AVX512_VNNI) if (LLAMA_AVX512_VNNI)
add_compile_option_cpp(-mavx512vnni) list(APPEND ARCH_FLAGS -mavx512vnni)
endif() endif()
endif() endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
message(STATUS "PowerPC detected") message(STATUS "PowerPC detected")
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
add_compile_options(-mcpu=powerpc64le) list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
else() else()
add_compile_options(-mcpu=native -mtune=native) list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
endif() endif()
else() else()
message(STATUS "Unknown architecture") message(STATUS "Unknown architecture")
endif() endif()
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
if (LLAMA_CUBLAS)
list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
endif()
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
endif()
if (MINGW) if (MINGW)
# Target Windows 8 for PrefetchVirtualMemory # Target Windows 8 for PrefetchVirtualMemory
add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER}) add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})