mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 19:04:35 +00:00
ggml : fix arm build (#10890)
* ggml: GGML_NATIVE uses -mcpu=native on ARM Signed-off-by: Adrien Gallouët <angt@huggingface.co> * ggml: Show detected features with GGML_NATIVE Signed-off-by: Adrien Gallouët <angt@huggingface.co> * remove msvc support, add GGML_CPU_ARM_ARCH option * disable llamafile in android example * march -> mcpu, skip adding feature macros ggml-ci --------- Signed-off-by: Adrien Gallouët <angt@huggingface.co> Co-authored-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
0bf2d10c55
commit
9177484f58
@ -19,6 +19,7 @@ android {
|
|||||||
externalNativeBuild {
|
externalNativeBuild {
|
||||||
cmake {
|
cmake {
|
||||||
arguments += "-DLLAMA_BUILD_COMMON=ON"
|
arguments += "-DLLAMA_BUILD_COMMON=ON"
|
||||||
|
arguments += "-DGGML_LLAMAFILE=OFF"
|
||||||
arguments += "-DCMAKE_BUILD_TYPE=Release"
|
arguments += "-DCMAKE_BUILD_TYPE=Release"
|
||||||
cppFlags += listOf()
|
cppFlags += listOf()
|
||||||
arguments += listOf()
|
arguments += listOf()
|
||||||
|
@ -74,10 +74,10 @@ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
# general
|
# general
|
||||||
option(GGML_STATIC "ggml: static link libraries" OFF)
|
option(GGML_STATIC "ggml: static link libraries" OFF)
|
||||||
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
|
option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
|
||||||
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
||||||
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
||||||
|
|
||||||
# debug
|
# debug
|
||||||
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
||||||
@ -120,8 +120,9 @@ endif()
|
|||||||
option(GGML_LASX "ggml: enable lasx" ON)
|
option(GGML_LASX "ggml: enable lasx" ON)
|
||||||
option(GGML_LSX "ggml: enable lsx" ON)
|
option(GGML_LSX "ggml: enable lsx" ON)
|
||||||
option(GGML_RVV "ggml: enable rvv" ON)
|
option(GGML_RVV "ggml: enable rvv" ON)
|
||||||
option(GGML_SVE "ggml: enable SVE" OFF)
|
|
||||||
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
||||||
|
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
||||||
|
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
|
@ -74,112 +74,77 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|||||||
|
|
||||||
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
|
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
|
||||||
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
||||||
(NOT CMAKE_OSX_ARCHITECTURES AND
|
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
||||||
NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
||||||
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
||||||
|
|
||||||
message(STATUS "ARM detected")
|
message(STATUS "ARM detected")
|
||||||
|
|
||||||
if (MSVC)
|
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||||
list(APPEND ARCH_DEFINITIONS __aarch64__) # MSVC defines _M_ARM64 instead
|
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
|
||||||
list(APPEND ARCH_DEFINITIONS __ARM_NEON)
|
|
||||||
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_FMA)
|
|
||||||
|
|
||||||
set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
|
|
||||||
string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
|
|
||||||
|
|
||||||
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
|
|
||||||
if (GGML_COMPILER_SUPPORT_DOTPROD)
|
|
||||||
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
|
|
||||||
|
|
||||||
message(STATUS "ARM feature DOTPROD enabled")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
|
|
||||||
|
|
||||||
if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
|
|
||||||
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
|
|
||||||
|
|
||||||
message(STATUS "ARM feature MATMUL_INT8 enabled")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
|
|
||||||
if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
|
|
||||||
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
|
||||||
|
|
||||||
message(STATUS "ARM feature FP16_VECTOR_ARITHMETIC enabled")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
|
|
||||||
elseif (APPLE)
|
|
||||||
if (GGML_NATIVE)
|
|
||||||
set(USER_PROVIDED_MARCH FALSE)
|
|
||||||
foreach(flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_REQUIRED_FLAGS)
|
|
||||||
if ("${${flag_var}}" MATCHES "-march=[a-zA-Z0-9+._-]+")
|
|
||||||
set(USER_PROVIDED_MARCH TRUE)
|
|
||||||
break()
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
|
|
||||||
if (NOT USER_PROVIDED_MARCH)
|
|
||||||
set(MARCH_FLAGS "-march=armv8.2a")
|
|
||||||
|
|
||||||
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
|
|
||||||
if (GGML_COMPILER_SUPPORT_DOTPROD)
|
|
||||||
set(MARCH_FLAGS "${MARCH_FLAGS}+dotprod")
|
|
||||||
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
|
|
||||||
|
|
||||||
message(STATUS "ARM feature DOTPROD enabled")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
set(TEST_I8MM_FLAGS "-march=armv8.2a+i8mm")
|
|
||||||
|
|
||||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
|
||||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${TEST_I8MM_FLAGS}")
|
|
||||||
|
|
||||||
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
|
|
||||||
if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
|
|
||||||
set(MARCH_FLAGS "${MARCH_FLAGS}+i8mm")
|
|
||||||
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
|
|
||||||
|
|
||||||
message(STATUS "ARM feature MATMUL_INT8 enabled")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
|
||||||
|
|
||||||
list(APPEND ARCH_FLAGS "${MARCH_FLAGS}")
|
|
||||||
endif ()
|
|
||||||
endif ()
|
|
||||||
else()
|
else()
|
||||||
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
||||||
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
||||||
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
|
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
|
||||||
endif()
|
endif()
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
|
||||||
# Raspberry Pi 1, Zero
|
if (GGML_NATIVE)
|
||||||
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
|
list(APPEND ARCH_FLAGS -mcpu=native)
|
||||||
endif()
|
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
|
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||||
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
|
|
||||||
# Android armeabi-v7a
|
# -mcpu=native does not always enable all the features in some compilers,
|
||||||
list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
|
# so we check for them manually and enable them if available
|
||||||
else()
|
|
||||||
# Raspberry Pi 2
|
include(CheckCXXSourceRuns)
|
||||||
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
|
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}+dotprod")
|
||||||
|
check_cxx_source_runs(
|
||||||
|
"#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
|
||||||
|
GGML_COMPILER_SUPPORT_DOTPROD)
|
||||||
|
if (GGML_COMPILER_SUPPORT_DOTPROD)
|
||||||
|
set(ARCH_FLAGS "${ARCH_FLAGS}+dotprod")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}+i8mm")
|
||||||
|
check_cxx_source_runs(
|
||||||
|
"#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"
|
||||||
|
GGML_COMPILER_SUPPORT_I8MM)
|
||||||
|
if (GGML_COMPILER_SUPPORT_I8MM)
|
||||||
|
set(ARCH_FLAGS "${ARCH_FLAGS}+i8mm")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
||||||
|
|
||||||
|
else()
|
||||||
|
if (GGML_CPU_ARM_ARCH)
|
||||||
|
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
|
|
||||||
# Android arm64-v8a
|
# show enabled features
|
||||||
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
execute_process(
|
||||||
list(APPEND ARCH_FLAGS -mno-unaligned-access)
|
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
|
||||||
endif()
|
INPUT_FILE "/dev/null"
|
||||||
if (GGML_SVE)
|
OUTPUT_VARIABLE ARM_FEATURE
|
||||||
list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
|
RESULT_VARIABLE ARM_FEATURE_RESULT
|
||||||
|
)
|
||||||
|
if (ARM_FEATURE_RESULT)
|
||||||
|
message(FATAL_ERROR "Failed to get ARM features")
|
||||||
|
else()
|
||||||
|
foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
|
||||||
|
string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
|
||||||
|
if (NOT ${feature_pos} EQUAL -1)
|
||||||
|
message(STATUS "ARM feature ${feature} enabled")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
|
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
|
||||||
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
||||||
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
|
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
|
||||||
|
|
||||||
|
message(STATUS "x86 detected")
|
||||||
|
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
# instruction set detection for MSVC only
|
# instruction set detection for MSVC only
|
||||||
if (GGML_NATIVE)
|
if (GGML_NATIVE)
|
||||||
|
@ -522,6 +522,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|||||||
if (ggml_cpu_has_sve()) {
|
if (ggml_cpu_has_sve()) {
|
||||||
features.push_back({ "SVE", "1" });
|
features.push_back({ "SVE", "1" });
|
||||||
}
|
}
|
||||||
|
if (ggml_cpu_has_dotprod()) {
|
||||||
|
features.push_back({ "DOTPROD", "1" });
|
||||||
|
}
|
||||||
|
if (ggml_cpu_has_matmul_int8()) {
|
||||||
|
features.push_back({ "MATMUL_INT8", "1" });
|
||||||
|
}
|
||||||
if (ggml_cpu_get_sve_cnt() > 0) {
|
if (ggml_cpu_get_sve_cnt() > 0) {
|
||||||
static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
|
static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
|
||||||
features.push_back({ "SVE_CNT", sve_cnt.c_str() });
|
features.push_back({ "SVE_CNT", sve_cnt.c_str() });
|
||||||
|
@ -204,6 +204,7 @@ template <> inline float32x4_t load(const float *p) {
|
|||||||
return vld1q_f32(p);
|
return vld1q_f32(p);
|
||||||
}
|
}
|
||||||
#if !defined(_MSC_VER)
|
#if !defined(_MSC_VER)
|
||||||
|
// FIXME: this should check for __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
|
||||||
template <> inline float16x8_t load(const ggml_fp16_t *p) {
|
template <> inline float16x8_t load(const ggml_fp16_t *p) {
|
||||||
return vld1q_f16((const float16_t *)p);
|
return vld1q_f16((const float16_t *)p);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user