mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 10:54:36 +00:00
cmake : pass CPU architecture flags to nvcc (#5146)
This commit is contained in:
parent
62fead3ea0
commit
bbe7c56c99
@ -466,17 +466,17 @@ function(get_flags CCID CCVER)
|
|||||||
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
|
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
|
||||||
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
|
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
|
||||||
)
|
)
|
||||||
set(C_FLAGS ${C_FLAGS} -Wdouble-promotion)
|
list(APPEND C_FLAGS -Wdouble-promotion)
|
||||||
endif()
|
endif()
|
||||||
elseif (CCID STREQUAL "GNU")
|
elseif (CCID STREQUAL "GNU")
|
||||||
set(C_FLAGS -Wdouble-promotion)
|
set(C_FLAGS -Wdouble-promotion)
|
||||||
set(CXX_FLAGS -Wno-array-bounds)
|
set(CXX_FLAGS -Wno-array-bounds)
|
||||||
|
|
||||||
if (CCVER VERSION_GREATER_EQUAL 7.1.0)
|
if (CCVER VERSION_GREATER_EQUAL 7.1.0)
|
||||||
set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation)
|
list(APPEND CXX_FLAGS -Wno-format-truncation)
|
||||||
endif()
|
endif()
|
||||||
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
|
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
|
||||||
set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi)
|
list(APPEND CXX_FLAGS -Wextra-semi)
|
||||||
endif()
|
endif()
|
||||||
elseif (CCID MATCHES "Intel")
|
elseif (CCID MATCHES "Intel")
|
||||||
# enable max optimization level when using Intel compiler
|
# enable max optimization level when using Intel compiler
|
||||||
@ -510,16 +510,18 @@ if (LLAMA_ALL_WARNINGS)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(CUDA_CXX_FLAGS "")
|
||||||
|
|
||||||
if (LLAMA_CUBLAS)
|
if (LLAMA_CUBLAS)
|
||||||
set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
|
set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic)
|
list(APPEND CUDA_FLAGS -Wno-pedantic)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
|
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
|
||||||
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
|
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
|
||||||
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
|
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
|
||||||
set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER})
|
list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
execute_process(
|
execute_process(
|
||||||
@ -547,13 +549,8 @@ if (LLAMA_CUBLAS)
|
|||||||
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
|
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
|
||||||
|
|
||||||
get_flags(${CUDA_CCID} ${CUDA_CCVER})
|
get_flags(${CUDA_CCID} ${CUDA_CCVER})
|
||||||
list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument
|
list(APPEND CUDA_CXX_FLAGS ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
|
||||||
if (NOT CUDA_CXX_FLAGS STREQUAL "")
|
|
||||||
set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
|
||||||
|
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
@ -618,12 +615,7 @@ if (NOT MSVC)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
function(add_compile_option_cpp ARG)
|
set(ARCH_FLAGS "")
|
||||||
# Adds a compile option to C/C++ only, but not for Cuda.
|
|
||||||
# Use, e.g., for CPU-architecture flags.
|
|
||||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${ARG}>)
|
|
||||||
add_compile_options($<$<COMPILE_LANGUAGE:C>:${ARG}>)
|
|
||||||
endfunction()
|
|
||||||
|
|
||||||
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
|
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
|
||||||
message(STATUS "ARM detected")
|
message(STATUS "ARM detected")
|
||||||
@ -636,19 +628,19 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
|
|||||||
else()
|
else()
|
||||||
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
||||||
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
||||||
add_compile_options(-mfp16-format=ieee)
|
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
|
||||||
endif()
|
endif()
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
||||||
# Raspberry Pi 1, Zero
|
# Raspberry Pi 1, Zero
|
||||||
add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access)
|
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
|
||||||
endif()
|
endif()
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
|
||||||
# Raspberry Pi 2
|
# Raspberry Pi 2
|
||||||
add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
|
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
|
||||||
endif()
|
endif()
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
|
||||||
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
||||||
add_compile_options(-mno-unaligned-access)
|
list(APPEND ARCH_FLAGS -mno-unaligned-access)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
|
||||||
@ -659,7 +651,7 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
|
|||||||
include(cmake/FindSIMD.cmake)
|
include(cmake/FindSIMD.cmake)
|
||||||
endif ()
|
endif ()
|
||||||
if (LLAMA_AVX512)
|
if (LLAMA_AVX512)
|
||||||
add_compile_option_cpp(/arch:AVX512)
|
list(APPEND ARCH_FLAGS /arch:AVX512)
|
||||||
# MSVC has no compile-time flags enabling specific
|
# MSVC has no compile-time flags enabling specific
|
||||||
# AVX512 extensions, neither it defines the
|
# AVX512 extensions, neither it defines the
|
||||||
# macros corresponding to the extensions.
|
# macros corresponding to the extensions.
|
||||||
@ -673,49 +665,61 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
|
|||||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
|
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
|
||||||
endif()
|
endif()
|
||||||
elseif (LLAMA_AVX2)
|
elseif (LLAMA_AVX2)
|
||||||
add_compile_option_cpp(/arch:AVX2)
|
list(APPEND ARCH_FLAGS /arch:AVX2)
|
||||||
elseif (LLAMA_AVX)
|
elseif (LLAMA_AVX)
|
||||||
add_compile_option_cpp(/arch:AVX)
|
list(APPEND ARCH_FLAGS /arch:AVX)
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
if (LLAMA_NATIVE)
|
if (LLAMA_NATIVE)
|
||||||
add_compile_option_cpp(-march=native)
|
list(APPEND ARCH_FLAGS -march=native)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_F16C)
|
if (LLAMA_F16C)
|
||||||
add_compile_option_cpp(-mf16c)
|
list(APPEND ARCH_FLAGS -mf16c)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_FMA)
|
if (LLAMA_FMA)
|
||||||
add_compile_option_cpp(-mfma)
|
list(APPEND ARCH_FLAGS -mfma)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_AVX)
|
if (LLAMA_AVX)
|
||||||
add_compile_option_cpp(-mavx)
|
list(APPEND ARCH_FLAGS -mavx)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_AVX2)
|
if (LLAMA_AVX2)
|
||||||
add_compile_option_cpp(-mavx2)
|
list(APPEND ARCH_FLAGS -mavx2)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_AVX512)
|
if (LLAMA_AVX512)
|
||||||
add_compile_option_cpp(-mavx512f)
|
list(APPEND ARCH_FLAGS -mavx512f)
|
||||||
add_compile_option_cpp(-mavx512bw)
|
list(APPEND ARCH_FLAGS -mavx512bw)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_AVX512_VBMI)
|
if (LLAMA_AVX512_VBMI)
|
||||||
add_compile_option_cpp(-mavx512vbmi)
|
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_AVX512_VNNI)
|
if (LLAMA_AVX512_VNNI)
|
||||||
add_compile_option_cpp(-mavx512vnni)
|
list(APPEND ARCH_FLAGS -mavx512vnni)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
||||||
message(STATUS "PowerPC detected")
|
message(STATUS "PowerPC detected")
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
||||||
add_compile_options(-mcpu=powerpc64le)
|
list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
|
||||||
else()
|
else()
|
||||||
add_compile_options(-mcpu=native -mtune=native)
|
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
|
||||||
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
|
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
message(STATUS "Unknown architecture")
|
message(STATUS "Unknown architecture")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
|
||||||
|
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
|
||||||
|
|
||||||
|
if (LLAMA_CUBLAS)
|
||||||
|
list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
|
||||||
|
list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
|
||||||
|
if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
|
||||||
|
list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
|
||||||
|
endif()
|
||||||
|
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
|
||||||
|
endif()
|
||||||
|
|
||||||
if (MINGW)
|
if (MINGW)
|
||||||
# Target Windows 8 for PrefetchVirtualMemory
|
# Target Windows 8 for PrefetchVirtualMemory
|
||||||
add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})
|
add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})
|
||||||
|
Loading…
Reference in New Issue
Block a user