diff --git a/ggml-quants.c b/ggml-quants.c index eef36e962..bb0556485 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -9651,7 +9651,7 @@ static int iq2_find_best_neighbour(const uint16_t * restrict neighbours, const u static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict vy, int n, const float * restrict quant_weights) { - const int gindex = iq2_data_index(256); + const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS); const uint64_t * kgrid_q2xs = iq2_data[gindex].grid; const int * kmap_q2xs = iq2_data[gindex].map; @@ -9824,7 +9824,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict vy, int n, const float * restrict quant_weights) { - const int gindex = iq2_data_index(512); + const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS); const uint64_t * kgrid_q2xs = iq2_data[gindex].grid; const int * kmap_q2xs = iq2_data[gindex].map; diff --git a/ggml.c b/ggml.c index ee5a60e18..c5b390711 100644 --- a/ggml.c +++ b/ggml.c @@ -19209,7 +19209,8 @@ size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * bool ggml_quantize_requires_imatrix(enum ggml_type type) { return type == GGML_TYPE_IQ2_XXS || - type == GGML_TYPE_IQ2_XS; + type == GGML_TYPE_IQ2_XS || + type == GGML_TYPE_IQ1_S; } size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start,