mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
ggml, ci : Windows ARM runner and build fixes (#5979)
* windows arm ci * fix `error C2078: too many initializers` with ggml_vld1q_u32 macro for MSVC ARM64 * fix `warning C4146: unary minus operator applied to unsigned type, result still unsigned` * fix `error C2065: '__fp16': undeclared identifier`
This commit is contained in:
parent
332bdfd798
commit
3202361c5b
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
@ -425,6 +425,8 @@ jobs:
|
|||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'vulkan'
|
- build: 'vulkan'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
|
- build: 'arm64'
|
||||||
|
defines: '-A ARM64 -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
@ -520,7 +522,7 @@ jobs:
|
|||||||
- name: Test
|
- name: Test
|
||||||
id: cmake_test
|
id: cmake_test
|
||||||
# not all machines have native AVX-512
|
# not all machines have native AVX-512
|
||||||
if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
|
if: ${{ matrix.build != 'arm64' && matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
|
||||||
run: |
|
run: |
|
||||||
cd build
|
cd build
|
||||||
ctest -L main -C Release --verbose --timeout 900
|
ctest -L main -C Release --verbose --timeout 900
|
||||||
|
@ -53,26 +53,30 @@ extern "C" {
|
|||||||
//
|
//
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
typedef __fp16 ggml_fp16_internal_t;
|
||||||
|
|
||||||
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||||
|
|
||||||
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
|
|
||||||
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||||
__fp16 tmp;
|
ggml_fp16_internal_t tmp;
|
||||||
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
|
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
|
||||||
return (float)tmp;
|
return (float)tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||||
ggml_fp16_t res;
|
ggml_fp16_t res;
|
||||||
__fp16 tmp = f;
|
ggml_fp16_internal_t tmp = f;
|
||||||
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
|
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
typedef uint16_t ggml_fp16_internal_t;
|
||||||
|
|
||||||
#ifdef __wasm_simd128__
|
#ifdef __wasm_simd128__
|
||||||
#include <wasm_simd128.h>
|
#include <wasm_simd128.h>
|
||||||
#else
|
#else
|
||||||
|
@ -9374,15 +9374,15 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
|||||||
|
|
||||||
const uint8x16_t idx_l = vld1q_u8(qs); qs += 16;
|
const uint8x16_t idx_l = vld1q_u8(qs); qs += 16;
|
||||||
idx.vec_index = vorrq_u16(vmovl_u8(vget_low_u8 (idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+0]), hshift), m256));
|
idx.vec_index = vorrq_u16(vmovl_u8(vget_low_u8 (idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+0]), hshift), m256));
|
||||||
const uint32x4_t aux32x4_0 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
||||||
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]};
|
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
|
||||||
const uint32x4_t aux32x4_1 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
||||||
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]};
|
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
|
||||||
idx.vec_index = vorrq_u16(vmovl_u8(vget_high_u8(idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+1]), hshift), m256));
|
idx.vec_index = vorrq_u16(vmovl_u8(vget_high_u8(idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+1]), hshift), m256));
|
||||||
const uint32x4_t aux32x4_2 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
||||||
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]};
|
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
|
||||||
const uint32x4_t aux32x4_3 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
||||||
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]};
|
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
|
||||||
|
|
||||||
|
|
||||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));
|
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));
|
||||||
|
4
ggml.c
4
ggml.c
@ -857,7 +857,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|||||||
#define GGML_F16x8 float16x8_t
|
#define GGML_F16x8 float16x8_t
|
||||||
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
|
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
|
||||||
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
|
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
|
||||||
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
|
#define GGML_F16x8_LOAD(x) vld1q_f16((const ggml_fp16_internal_t *)(x))
|
||||||
#define GGML_F16x8_STORE vst1q_f16
|
#define GGML_F16x8_STORE vst1q_f16
|
||||||
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
|
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
|
||||||
#define GGML_F16x8_ADD vaddq_f16
|
#define GGML_F16x8_ADD vaddq_f16
|
||||||
@ -900,7 +900,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|||||||
#define GGML_F32Cx4 float32x4_t
|
#define GGML_F32Cx4 float32x4_t
|
||||||
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
|
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
|
||||||
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
|
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
|
||||||
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
|
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const ggml_fp16_internal_t *)(x)))
|
||||||
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
|
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
|
||||||
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
|
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
|
||||||
#define GGML_F32Cx4_ADD vaddq_f32
|
#define GGML_F32Cx4_ADD vaddq_f32
|
||||||
|
@ -13980,7 +13980,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
|
|||||||
} else if (llama_is_user_defined_token(model->vocab, token)) {
|
} else if (llama_is_user_defined_token(model->vocab, token)) {
|
||||||
std::string result = model->vocab.id_to_token[token].text;
|
std::string result = model->vocab.id_to_token[token].text;
|
||||||
if (length < (int) result.length()) {
|
if (length < (int) result.length()) {
|
||||||
return -result.length();
|
return -(int) result.length();
|
||||||
}
|
}
|
||||||
memcpy(buf, result.c_str(), result.length());
|
memcpy(buf, result.c_str(), result.length());
|
||||||
return result.length();
|
return result.length();
|
||||||
@ -14015,7 +14015,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
|
|||||||
} else if (llama_is_user_defined_token(model->vocab, token)) {
|
} else if (llama_is_user_defined_token(model->vocab, token)) {
|
||||||
std::string result = model->vocab.id_to_token[token].text;
|
std::string result = model->vocab.id_to_token[token].text;
|
||||||
if (length < (int) result.length()) {
|
if (length < (int) result.length()) {
|
||||||
return -result.length();
|
return -(int) result.length();
|
||||||
}
|
}
|
||||||
memcpy(buf, result.c_str(), result.length());
|
memcpy(buf, result.c_str(), result.length());
|
||||||
return result.length();
|
return result.length();
|
||||||
|
Loading…
Reference in New Issue
Block a user