From abd798d70f2a53ecf47a883b6da2ab030462e281 Mon Sep 17 00:00:00 2001 From: Eddie-Wang Date: Mon, 10 Jun 2024 02:50:14 +0000 Subject: [PATCH] fix code --- ggml-quants.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++----- ggml.c | 1 - 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 273cdee70..72149d4a0 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -664,13 +664,13 @@ void quantize_row_i8_s(const float * x, void * y, int64_t n, float* act_scales) double min = 0.00001; double max = min; for (int i = 0; i < n; ++i) { - max = MAX(max, (double)fabs(x[i])); + max = MAX(max, (double)fabs((double)x[i])); } float s = 127 / max; act_scales[0] = s; float temp; for (int i = 0; i < n; ++i) { - temp = round(x[i] * s); + temp = round((double)(x[i] * s)); if (temp > 127) temp = 127; if (temp < -128) temp = -128; dst[i] = (int8_t)(temp); @@ -3335,14 +3335,14 @@ size_t quantize_i2_s(const float * restrict src, void * restrict dst, int64_t nr // f32 -> q8 double i2_scale = 0; for (int i=0; i 1e-6) { - i2_scale = src[i]; + if (fabs((double)(src[i])) > 1e-6) { + i2_scale = (double)src[i]; } } uint8_t* q8 = (uint8_t*)dst; for (int i=0; i