mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
Fix CI: ARM NEON, quantization unit tests, editorconfig (#1122)
This commit is contained in:
parent
5f939498d5
commit
c50b628810
16
ggml.c
16
ggml.c
@ -2635,15 +2635,15 @@ static void ggml_vec_dot_q4_1_q8_0(const int n, float * restrict s, const void *
|
|||||||
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), x0->d*y0->d);
|
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), x0->d*y0->d);
|
||||||
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), x1->d*y1->d);
|
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), x1->d*y1->d);
|
||||||
#else
|
#else
|
||||||
const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0l), vget_low_s8 (v1_0ls));
|
const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0lz), vget_low_s8 (v1_0l));
|
||||||
const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0l), vget_high_s8(v1_0ls));
|
const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0lz), vget_high_s8(v1_0l));
|
||||||
const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0h), vget_low_s8 (v1_0hs));
|
const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hz), vget_low_s8 (v1_0h));
|
||||||
const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0h), vget_high_s8(v1_0hs));
|
const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0hz), vget_high_s8(v1_0h));
|
||||||
|
|
||||||
const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1l), vget_low_s8 (v1_1ls));
|
const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1lz), vget_low_s8 (v1_1l));
|
||||||
const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1l), vget_high_s8(v1_1ls));
|
const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1lz), vget_high_s8(v1_1l));
|
||||||
const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1h), vget_low_s8 (v1_1hs));
|
const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1hz), vget_low_s8 (v1_1h));
|
||||||
const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1h), vget_high_s8(v1_1hs));
|
const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1hz), vget_high_s8(v1_1h));
|
||||||
|
|
||||||
const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h));
|
const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h));
|
||||||
const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h));
|
const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h));
|
||||||
|
@ -2256,7 +2256,6 @@ std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_te
|
|||||||
|
|
||||||
// Returns the size of the state
|
// Returns the size of the state
|
||||||
size_t llama_get_state_size(struct llama_context * ctx) {
|
size_t llama_get_state_size(struct llama_context * ctx) {
|
||||||
const size_t s_bool = sizeof(int32_t);
|
|
||||||
// we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
|
// we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
|
||||||
// for reference, std::mt19937(1337) serializes to 6701 bytes.
|
// for reference, std::mt19937(1337) serializes to 6701 bytes.
|
||||||
const size_t s_rng_size = sizeof(size_t);
|
const size_t s_rng_size = sizeof(size_t);
|
||||||
|
@ -120,7 +120,7 @@ int main(int argc, char * argv[]) {
|
|||||||
ggml_type type = (ggml_type) i;
|
ggml_type type = (ggml_type) i;
|
||||||
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
|
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
|
||||||
|
|
||||||
if (qfns.quantize_row_q) {
|
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
||||||
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
||||||
failed = !(total_error < MAX_QUANTIZATION_TOTAL_ERROR);
|
failed = !(total_error < MAX_QUANTIZATION_TOTAL_ERROR);
|
||||||
num_failed += failed;
|
num_failed += failed;
|
||||||
|
@ -225,7 +225,7 @@ int main(int argc, char * argv[]) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (qfns.quantize_row_q) {
|
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
||||||
printf("%s\n", ggml_type_name(type));
|
printf("%s\n", ggml_type_name(type));
|
||||||
|
|
||||||
if (params.op_quantize_row_q_reference) {
|
if (params.op_quantize_row_q_reference) {
|
||||||
|
Loading…
Reference in New Issue
Block a user