mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 18:34:36 +00:00
ggml : add run-time detection of neon, i8mm and sve (#9331)
* ggml: Added run-time detection of neon, i8mm and sve Adds run-time detection of the Arm instructions set features neon, i8mm and sve for Linux and Apple build targets. * ggml: Extend feature detection to include non aarch64 Arm arch * ggml: Move definition of ggml_arm_arch_features to the global data section
This commit is contained in:
parent
89f9944981
commit
6a0f779484
@ -2507,6 +2507,9 @@ extern "C" {
|
|||||||
GGML_API int ggml_cpu_has_cann (void);
|
GGML_API int ggml_cpu_has_cann (void);
|
||||||
GGML_API int ggml_cpu_has_llamafile (void);
|
GGML_API int ggml_cpu_has_llamafile (void);
|
||||||
|
|
||||||
|
// get the sve vector length in bytes
|
||||||
|
GGML_API int ggml_cpu_get_sve_cnt(void);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Internal types and functions exposed for tests and benchmarks
|
// Internal types and functions exposed for tests and benchmarks
|
||||||
//
|
//
|
||||||
|
@ -598,15 +598,6 @@ size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_
|
|||||||
return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
|
return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the number of byte lanes in the SVE vector if SVE is supported; otherwise, returns 0 if SVE is not supported.
|
|
||||||
static int sve_lane_count(void) {
|
|
||||||
#if defined(__ARM_FEATURE_SVE)
|
|
||||||
return ggml_sve_cnt_b;
|
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, const void * restrict vy, int nr, int nc) {
|
void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, const void * restrict vy, int nr, int nc) {
|
||||||
const int qk = QK8_0;
|
const int qk = QK8_0;
|
||||||
const int nb = n / qk;
|
const int nb = n / qk;
|
||||||
@ -843,7 +834,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
|
|||||||
|
|
||||||
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
|
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
|
||||||
#if defined(__ARM_FEATURE_SVE)
|
#if defined(__ARM_FEATURE_SVE)
|
||||||
if (ggml_cpu_has_sve() && sve_lane_count() == QK8_0) {
|
if (ggml_cpu_has_sve() && ggml_cpu_get_sve_cnt() == QK8_0) {
|
||||||
const void * b_ptr = vx;
|
const void * b_ptr = vx;
|
||||||
const void * a_ptr = vy;
|
const void * a_ptr = vy;
|
||||||
float * res_ptr = s;
|
float * res_ptr = s;
|
||||||
@ -2020,7 +2011,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
|
|||||||
|
|
||||||
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
|
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
|
||||||
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
|
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
|
||||||
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && sve_lane_count() == QK8_0) {
|
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) {
|
||||||
const void * b_ptr = vx;
|
const void * b_ptr = vx;
|
||||||
const void * a_ptr = vy;
|
const void * a_ptr = vy;
|
||||||
float * res_ptr = s;
|
float * res_ptr = s;
|
||||||
|
@ -4013,7 +4013,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||||||
svfloat32_t sumv0 = svdup_n_f32(0.0f);
|
svfloat32_t sumv0 = svdup_n_f32(0.0f);
|
||||||
svfloat32_t sumv1 = svdup_n_f32(0.0f);
|
svfloat32_t sumv1 = svdup_n_f32(0.0f);
|
||||||
|
|
||||||
const int vector_length = ggml_sve_cnt_b*8;
|
const int vector_length = ggml_cpu_get_sve_cnt()*8;
|
||||||
|
|
||||||
// VLA Implementation using switch case
|
// VLA Implementation using switch case
|
||||||
switch (vector_length) {
|
switch (vector_length) {
|
||||||
@ -5597,7 +5597,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|||||||
svfloat32_t sumv0 = svdup_n_f32(0.0f);
|
svfloat32_t sumv0 = svdup_n_f32(0.0f);
|
||||||
svfloat32_t sumv1 = svdup_n_f32(0.0f);
|
svfloat32_t sumv1 = svdup_n_f32(0.0f);
|
||||||
|
|
||||||
const int vector_length = ggml_sve_cnt_b*8;
|
const int vector_length = ggml_cpu_get_sve_cnt()*8;
|
||||||
|
|
||||||
//VLA Implemenation for SVE
|
//VLA Implemenation for SVE
|
||||||
switch (vector_length) {
|
switch (vector_length) {
|
||||||
|
@ -142,10 +142,6 @@ void iq2xs_free_impl(enum ggml_type type);
|
|||||||
void iq3xs_init_impl(int grid_size);
|
void iq3xs_init_impl(int grid_size);
|
||||||
void iq3xs_free_impl(int grid_size);
|
void iq3xs_free_impl(int grid_size);
|
||||||
|
|
||||||
#if defined(__ARM_FEATURE_SVE)
|
|
||||||
extern int ggml_sve_cnt_b;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
101
ggml/src/ggml.c
101
ggml/src/ggml.c
@ -39,9 +39,6 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__ARM_FEATURE_SVE)
|
|
||||||
int ggml_sve_cnt_b = 0;
|
|
||||||
#endif
|
|
||||||
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
|
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
|
||||||
#undef GGML_USE_LLAMAFILE
|
#undef GGML_USE_LLAMAFILE
|
||||||
#endif
|
#endif
|
||||||
@ -455,6 +452,15 @@ static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
|
|||||||
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
||||||
float ggml_table_f32_f16[1 << 16];
|
float ggml_table_f32_f16[1 << 16];
|
||||||
|
|
||||||
|
#if defined(__ARM_ARCH)
|
||||||
|
struct ggml_arm_arch_features_type {
|
||||||
|
int has_neon;
|
||||||
|
int has_i8mm;
|
||||||
|
int has_sve;
|
||||||
|
int sve_cnt;
|
||||||
|
} ggml_arm_arch_features = {-1, -1, -1, 0};
|
||||||
|
#endif
|
||||||
|
|
||||||
GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
|
GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
|
||||||
switch (status) {
|
switch (status) {
|
||||||
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
|
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
|
||||||
@ -3673,6 +3679,66 @@ static inline int ggml_up(int n, int m) {
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__ARM_ARCH)
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__aarch64__)
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void ggml_init_arm_arch_features(void) {
|
||||||
|
#if defined(__linux__) && defined(__aarch64__)
|
||||||
|
uint32_t hwcap = getauxval(AT_HWCAP);
|
||||||
|
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
||||||
|
|
||||||
|
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
||||||
|
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
||||||
|
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
||||||
|
|
||||||
|
#if defined(__ARM_FEATURE_SVE)
|
||||||
|
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
||||||
|
#endif
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
int oldp = 0;
|
||||||
|
size_t size = sizeof(oldp);
|
||||||
|
if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
|
||||||
|
oldp = 0;
|
||||||
|
}
|
||||||
|
ggml_arm_arch_features.has_neon = oldp;
|
||||||
|
|
||||||
|
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
|
||||||
|
oldp = 0;
|
||||||
|
}
|
||||||
|
ggml_arm_arch_features.has_i8mm = oldp;
|
||||||
|
|
||||||
|
ggml_arm_arch_features.has_sve = 0;
|
||||||
|
ggml_arm_arch_features.sve_cnt = 0;
|
||||||
|
#else
|
||||||
|
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
|
||||||
|
#if defined(__ARM_NEON)
|
||||||
|
ggml_arm_arch_features.has_neon = 1;
|
||||||
|
#else
|
||||||
|
ggml_arm_arch_features.has_neon = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
||||||
|
ggml_arm_arch_features.has_i8mm = 1;
|
||||||
|
#else
|
||||||
|
ggml_arm_arch_features.has_i8mm = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__ARM_FEATURE_SVE)
|
||||||
|
ggml_arm_arch_features.has_sve = 1;
|
||||||
|
ggml_arm_arch_features.sve_cnt = 16;
|
||||||
|
#else
|
||||||
|
ggml_arm_arch_features.has_sve = 0;
|
||||||
|
ggml_arm_arch_features.sve_cnt = 0;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
||||||
// make this function thread safe
|
// make this function thread safe
|
||||||
ggml_critical_section_start();
|
ggml_critical_section_start();
|
||||||
@ -3723,6 +3789,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||||||
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__ARM_ARCH)
|
||||||
|
ggml_init_arm_arch_features();
|
||||||
|
#endif
|
||||||
|
|
||||||
is_first_call = false;
|
is_first_call = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3771,12 +3841,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||||||
|
|
||||||
GGML_ASSERT_ALIGNED(ctx->mem_buffer);
|
GGML_ASSERT_ALIGNED(ctx->mem_buffer);
|
||||||
|
|
||||||
#if defined(__ARM_FEATURE_SVE)
|
|
||||||
if (!ggml_sve_cnt_b) {
|
|
||||||
ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
||||||
|
|
||||||
ggml_critical_section_end();
|
ggml_critical_section_end();
|
||||||
@ -23578,16 +23642,16 @@ int ggml_cpu_has_fma(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_neon(void) {
|
int ggml_cpu_has_neon(void) {
|
||||||
#if defined(__ARM_NEON)
|
#if defined(__ARM_ARCH)
|
||||||
return 1;
|
return ggml_arm_arch_features.has_neon;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_sve(void) {
|
int ggml_cpu_has_sve(void) {
|
||||||
#if defined(__ARM_FEATURE_SVE)
|
#if defined(__ARM_ARCH)
|
||||||
return 1;
|
return ggml_arm_arch_features.has_sve;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
@ -23734,11 +23798,18 @@ int ggml_cpu_has_vsx(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_matmul_int8(void) {
|
int ggml_cpu_has_matmul_int8(void) {
|
||||||
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
#if defined(__ARM_ARCH)
|
||||||
return 1;
|
return ggml_arm_arch_features.has_i8mm;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ggml_cpu_get_sve_cnt(void) {
|
||||||
|
#if defined(__ARM_ARCH)
|
||||||
|
return ggml_arm_arch_features.sve_cnt;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
Loading…
Reference in New Issue
Block a user