diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index aa52cee64..05207afe5 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1417,7 +1417,7 @@ class BitnetModel(Model): def weight_quant(self, weight): dtype = weight.dtype weight = weight.float() - s = 1 / weight.abs().mean().clamp(min=1e-5) + s = 1 / weight.abs().mean().clamp(min=1e-5) result = (weight * s).round().clamp(-1, 1) / s return result.type(dtype) diff --git a/ggml-quants.c b/ggml-quants.c index 6a825cd74..7deeb367f 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -3802,6 +3802,10 @@ void ggml_vec_dot_i2_i8_s(int n, float * restrict s, size_t bs, const void * res UNUSED(by); UNUSED(nrc); +#if defined(__AVX2__) + // TODO +#else + int sumi = 0; for (int i = 0; i < n / 4; i++) { @@ -3812,7 +3816,7 @@ void ggml_vec_dot_i2_i8_s(int n, float * restrict s, size_t bs, const void * res sumi += (int)y[i*4+3] * weight[3]; } *s = (float)sumi; - +#endif } void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {