From 678e1389701109842b39ea1c3415ef85e212836b Mon Sep 17 00:00:00 2001 From: Stephan Walter Date: Sat, 8 Apr 2023 10:46:49 +0200 Subject: [PATCH] Update stats tool for unbounded's method --- examples/quantize-stats/quantize-stats.cpp | 16 ++++++++-------- ggml.c | 17 ++++++----------- ggml.h | 3 ++- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 6a2fe6116..051e1961c 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -17,7 +17,7 @@ static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" }; static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list"); -static const char * impl_strs[] = { "simd", "reference", "rmse" }; +static const char * impl_strs[] = { "simd", "reference", "rmse-sw", "rmse-unbounded" }; static_assert(sizeof(impl_strs) == GGML_QUANTIZE_IMPL_COUNT * sizeof(char *), "Incomplete implementation list"); struct quantize_stats_params { @@ -52,7 +52,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) { fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); fprintf(stderr, " -i, --implementation\n"); - fprintf(stderr, " select implementation (simd, reference, rmse)\n"); + fprintf(stderr, " select implementation (simd, reference, rmse-sw, rmse-unbounded)\n"); fprintf(stderr, " -v, --verbose\n"); fprintf(stderr, " verbose output (default: false)\n"); fprintf(stderr, " -p, --per-layer-stats\n"); @@ -111,7 +111,7 @@ void print_error_stats(const std::string & name, ggml_quantize_impl_t impl, cons double rmse = sqrt(stats.total_error / (double) stats.num_samples); double median = find_quantile(stats, .5); double pct95 = find_quantile(stats, .95); - printf("%-4s %-10s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n", + printf("%-4s %-15s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n", name.c_str(), impl_strs[impl], rmse, stats.max_error, pct95, median); if (print_histogram) { printf("Error distribution:\n"); @@ -321,12 +321,12 @@ int main(int argc, char ** argv) { continue; } quantize_fns_t qfns = ggml_internal_get_quantize_fn(type); - if (qfns.quantize_row_q && qfns.dequantize_row_q) { - for (int impl = 0; impl < GGML_QUANTIZE_IMPL_COUNT; impl++) { - if (!params.include_impl.empty() && std::find(params.include_impl.begin(), params.include_impl.end(), impl) == params.include_impl.end()) { - continue; - } + for (int impl = 0; impl < GGML_QUANTIZE_IMPL_COUNT; impl++) { + if (!params.include_impl.empty() && std::find(params.include_impl.begin(), params.include_impl.end(), impl) == params.include_impl.end()) { + continue; + } + if (qfns.quantize_row_q[impl] && qfns.dequantize_row_q) { if (params.verbose) { printf("testing %s %s ...\n", type_strs[type], impl_strs[impl]); } diff --git a/ggml.c b/ggml.c index 4171ad804..b301c9772 100644 --- a/ggml.c +++ b/ggml.c @@ -6790,27 +6790,22 @@ static void ggml_compute_forward_mul_mat_f16_f32( //} } -static void quantize_row_q_missing(const float * x, void * y, int k) { - (void)x; (void)y; (void)k; - assert(false); -} - static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = { [GGML_TYPE_Q4_0] = { .dequantize_row_q = dequantize_row_q4_0, .quantize_row_q = { - [GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_0, - [GGML_QUANTIZE_IMPL_REFERENCE] = (quantize_row_q_t)quantize_row_q4_0_reference, - [GGML_QUANTIZE_IMPL_RMSE] = (quantize_row_q_t)quantize_row_q4_0_rmse, + [GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_0, + [GGML_QUANTIZE_IMPL_REFERENCE] = (quantize_row_q_t)quantize_row_q4_0_reference, + [GGML_QUANTIZE_IMPL_RMSE_SW] = (quantize_row_q_t)quantize_row_q4_0_rmse, + [GGML_QUANTIZE_IMPL_RMSE_UNBOUNDED] = (quantize_row_q_t)quantize_row_q4_0_slow, }, .vec_dot_q = ggml_vec_dot_q4_0, }, [GGML_TYPE_Q4_1] = { .dequantize_row_q = dequantize_row_q4_1, .quantize_row_q = { - [GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_1, - [GGML_QUANTIZE_IMPL_REFERENCE] = quantize_row_q4_1_reference, - [GGML_QUANTIZE_IMPL_RMSE] = quantize_row_q_missing, + [GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_1, + [GGML_QUANTIZE_IMPL_REFERENCE] = quantize_row_q4_1_reference, }, .vec_dot_q = ggml_vec_dot_q4_1, }, diff --git a/ggml.h b/ggml.h index 9c28f781a..63c198a4f 100644 --- a/ggml.h +++ b/ggml.h @@ -795,7 +795,8 @@ typedef void (*vec_dot_q_t)(const int n, float * s, const void * x, const void * typedef enum { GGML_QUANTIZE_IMPL_SIMD, GGML_QUANTIZE_IMPL_REFERENCE, - GGML_QUANTIZE_IMPL_RMSE, + GGML_QUANTIZE_IMPL_RMSE_SW, + GGML_QUANTIZE_IMPL_RMSE_UNBOUNDED, GGML_QUANTIZE_IMPL_COUNT } ggml_quantize_impl_t;