mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
ggml-quants : provide ggml_vqtbl1q_u8 for 64bit compatibility (#5711)
* [ggml-quants] Provide ggml_vqtbl1q_u8 for 64bit compatibility vqtbl1q_u8 is not part of arm v7 neon library * [android-example] Remove abi filter after arm v7a fix * [github-workflows] Do not skip Android armeabi-v7a build
This commit is contained in:
parent
f1a98c5254
commit
abbabc5e51
3
.github/workflows/build.yml
vendored
3
.github/workflows/build.yml
vendored
@ -669,8 +669,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
cd examples/llama.android
|
cd examples/llama.android
|
||||||
|
|
||||||
# Skip armeabi-v7a for now (https://github.com/llvm/llvm-project/issues/65820).
|
./gradlew build --no-daemon
|
||||||
./gradlew build --no-daemon -Pskip-armeabi-v7a
|
|
||||||
|
|
||||||
# freeBSD-latest:
|
# freeBSD-latest:
|
||||||
# runs-on: macos-12
|
# runs-on: macos-12
|
||||||
|
@ -21,12 +21,8 @@ android {
|
|||||||
useSupportLibrary = true
|
useSupportLibrary = true
|
||||||
}
|
}
|
||||||
ndk {
|
ndk {
|
||||||
// Workaround for https://github.com/llvm/llvm-project/issues/65820
|
// Add NDK properties if wanted, e.g.
|
||||||
// affecting armeabi-v7a. Skip armeabi-v7a when invoked with
|
// abiFilters += listOf("arm64-v8a")
|
||||||
// -Pskip-armeabi-v7a (e.g., ./gradlew build -Pskip-armeabi-v7a).
|
|
||||||
if (project.hasProperty("skip-armeabi-v7a")) {
|
|
||||||
abiFilters += listOf("arm64-v8a", "x86_64", "x86")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
externalNativeBuild {
|
externalNativeBuild {
|
||||||
cmake {
|
cmake {
|
||||||
|
@ -462,6 +462,30 @@ inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: not tested
|
||||||
|
inline static int8x16_t ggml_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
|
||||||
|
int8x16_t res;
|
||||||
|
|
||||||
|
res[ 0] = a[b[ 0]];
|
||||||
|
res[ 1] = a[b[ 1]];
|
||||||
|
res[ 2] = a[b[ 2]];
|
||||||
|
res[ 3] = a[b[ 3]];
|
||||||
|
res[ 4] = a[b[ 4]];
|
||||||
|
res[ 5] = a[b[ 5]];
|
||||||
|
res[ 6] = a[b[ 6]];
|
||||||
|
res[ 7] = a[b[ 7]];
|
||||||
|
res[ 8] = a[b[ 8]];
|
||||||
|
res[ 9] = a[b[ 9]];
|
||||||
|
res[10] = a[b[10]];
|
||||||
|
res[11] = a[b[11]];
|
||||||
|
res[12] = a[b[12]];
|
||||||
|
res[13] = a[b[13]];
|
||||||
|
res[14] = a[b[14]];
|
||||||
|
res[15] = a[b[15]];
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define ggml_int16x8x2_t int16x8x2_t
|
#define ggml_int16x8x2_t int16x8x2_t
|
||||||
@ -476,6 +500,7 @@ inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
|
|||||||
#define ggml_vld1q_s8_x2 vld1q_s8_x2
|
#define ggml_vld1q_s8_x2 vld1q_s8_x2
|
||||||
#define ggml_vld1q_s8_x4 vld1q_s8_x4
|
#define ggml_vld1q_s8_x4 vld1q_s8_x4
|
||||||
#define ggml_vqtbl1q_s8 vqtbl1q_s8
|
#define ggml_vqtbl1q_s8 vqtbl1q_s8
|
||||||
|
#define ggml_vqtbl1q_u8 vqtbl1q_u8
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -9488,8 +9513,8 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const v
|
|||||||
qs += 16;
|
qs += 16;
|
||||||
|
|
||||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));
|
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));
|
||||||
vs.val[1] = vandq_u8(vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
||||||
vs.val[0] = vandq_u8(vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
||||||
vs.val[0] = vceqq_u8(vs.val[0], mask2);
|
vs.val[0] = vceqq_u8(vs.val[0], mask2);
|
||||||
vs.val[1] = vceqq_u8(vs.val[1], mask2);
|
vs.val[1] = vceqq_u8(vs.val[1], mask2);
|
||||||
|
|
||||||
@ -9497,8 +9522,8 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const v
|
|||||||
q3s.val[1] = vsubq_s8(vreinterpretq_s8_u8(veorq_u8(vs.val[1], vreinterpretq_u8_u32(aux32x4_1))), vreinterpretq_s8_u8(vs.val[1]));
|
q3s.val[1] = vsubq_s8(vreinterpretq_s8_u8(veorq_u8(vs.val[1], vreinterpretq_u8_u32(aux32x4_1))), vreinterpretq_s8_u8(vs.val[1]));
|
||||||
|
|
||||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | (signs[3] << 16)));
|
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | (signs[3] << 16)));
|
||||||
vs.val[1] = vandq_u8(vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
||||||
vs.val[0] = vandq_u8(vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
||||||
vs.val[0] = vceqq_u8(vs.val[0], mask2);
|
vs.val[0] = vceqq_u8(vs.val[0], mask2);
|
||||||
vs.val[1] = vceqq_u8(vs.val[1], mask2);
|
vs.val[1] = vceqq_u8(vs.val[1], mask2);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user