mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-03 23:34:35 +00:00
Update stats tool for unbounded's method
This commit is contained in:
parent
4dc62e78d8
commit
678e138970
@ -17,7 +17,7 @@
|
|||||||
static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" };
|
static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" };
|
||||||
static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
|
static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
|
||||||
|
|
||||||
static const char * impl_strs[] = { "simd", "reference", "rmse" };
|
static const char * impl_strs[] = { "simd", "reference", "rmse-sw", "rmse-unbounded" };
|
||||||
static_assert(sizeof(impl_strs) == GGML_QUANTIZE_IMPL_COUNT * sizeof(char *), "Incomplete implementation list");
|
static_assert(sizeof(impl_strs) == GGML_QUANTIZE_IMPL_COUNT * sizeof(char *), "Incomplete implementation list");
|
||||||
|
|
||||||
struct quantize_stats_params {
|
struct quantize_stats_params {
|
||||||
@ -52,7 +52,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
|
|||||||
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
||||||
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
||||||
fprintf(stderr, " -i, --implementation\n");
|
fprintf(stderr, " -i, --implementation\n");
|
||||||
fprintf(stderr, " select implementation (simd, reference, rmse)\n");
|
fprintf(stderr, " select implementation (simd, reference, rmse-sw, rmse-unbounded)\n");
|
||||||
fprintf(stderr, " -v, --verbose\n");
|
fprintf(stderr, " -v, --verbose\n");
|
||||||
fprintf(stderr, " verbose output (default: false)\n");
|
fprintf(stderr, " verbose output (default: false)\n");
|
||||||
fprintf(stderr, " -p, --per-layer-stats\n");
|
fprintf(stderr, " -p, --per-layer-stats\n");
|
||||||
@ -111,7 +111,7 @@ void print_error_stats(const std::string & name, ggml_quantize_impl_t impl, cons
|
|||||||
double rmse = sqrt(stats.total_error / (double) stats.num_samples);
|
double rmse = sqrt(stats.total_error / (double) stats.num_samples);
|
||||||
double median = find_quantile(stats, .5);
|
double median = find_quantile(stats, .5);
|
||||||
double pct95 = find_quantile(stats, .95);
|
double pct95 = find_quantile(stats, .95);
|
||||||
printf("%-4s %-10s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n",
|
printf("%-4s %-15s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n",
|
||||||
name.c_str(), impl_strs[impl], rmse, stats.max_error, pct95, median);
|
name.c_str(), impl_strs[impl], rmse, stats.max_error, pct95, median);
|
||||||
if (print_histogram) {
|
if (print_histogram) {
|
||||||
printf("Error distribution:\n");
|
printf("Error distribution:\n");
|
||||||
@ -321,12 +321,12 @@ int main(int argc, char ** argv) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
quantize_fns_t qfns = ggml_internal_get_quantize_fn(type);
|
quantize_fns_t qfns = ggml_internal_get_quantize_fn(type);
|
||||||
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
|
||||||
for (int impl = 0; impl < GGML_QUANTIZE_IMPL_COUNT; impl++) {
|
for (int impl = 0; impl < GGML_QUANTIZE_IMPL_COUNT; impl++) {
|
||||||
if (!params.include_impl.empty() && std::find(params.include_impl.begin(), params.include_impl.end(), impl) == params.include_impl.end()) {
|
if (!params.include_impl.empty() && std::find(params.include_impl.begin(), params.include_impl.end(), impl) == params.include_impl.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (qfns.quantize_row_q[impl] && qfns.dequantize_row_q) {
|
||||||
if (params.verbose) {
|
if (params.verbose) {
|
||||||
printf("testing %s %s ...\n", type_strs[type], impl_strs[impl]);
|
printf("testing %s %s ...\n", type_strs[type], impl_strs[impl]);
|
||||||
}
|
}
|
||||||
|
9
ggml.c
9
ggml.c
@ -6790,18 +6790,14 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|||||||
//}
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void quantize_row_q_missing(const float * x, void * y, int k) {
|
|
||||||
(void)x; (void)y; (void)k;
|
|
||||||
assert(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
||||||
[GGML_TYPE_Q4_0] = {
|
[GGML_TYPE_Q4_0] = {
|
||||||
.dequantize_row_q = dequantize_row_q4_0,
|
.dequantize_row_q = dequantize_row_q4_0,
|
||||||
.quantize_row_q = {
|
.quantize_row_q = {
|
||||||
[GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_0,
|
[GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_0,
|
||||||
[GGML_QUANTIZE_IMPL_REFERENCE] = (quantize_row_q_t)quantize_row_q4_0_reference,
|
[GGML_QUANTIZE_IMPL_REFERENCE] = (quantize_row_q_t)quantize_row_q4_0_reference,
|
||||||
[GGML_QUANTIZE_IMPL_RMSE] = (quantize_row_q_t)quantize_row_q4_0_rmse,
|
[GGML_QUANTIZE_IMPL_RMSE_SW] = (quantize_row_q_t)quantize_row_q4_0_rmse,
|
||||||
|
[GGML_QUANTIZE_IMPL_RMSE_UNBOUNDED] = (quantize_row_q_t)quantize_row_q4_0_slow,
|
||||||
},
|
},
|
||||||
.vec_dot_q = ggml_vec_dot_q4_0,
|
.vec_dot_q = ggml_vec_dot_q4_0,
|
||||||
},
|
},
|
||||||
@ -6810,7 +6806,6 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
|||||||
.quantize_row_q = {
|
.quantize_row_q = {
|
||||||
[GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_1,
|
[GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_1,
|
||||||
[GGML_QUANTIZE_IMPL_REFERENCE] = quantize_row_q4_1_reference,
|
[GGML_QUANTIZE_IMPL_REFERENCE] = quantize_row_q4_1_reference,
|
||||||
[GGML_QUANTIZE_IMPL_RMSE] = quantize_row_q_missing,
|
|
||||||
},
|
},
|
||||||
.vec_dot_q = ggml_vec_dot_q4_1,
|
.vec_dot_q = ggml_vec_dot_q4_1,
|
||||||
},
|
},
|
||||||
|
3
ggml.h
3
ggml.h
@ -795,7 +795,8 @@ typedef void (*vec_dot_q_t)(const int n, float * s, const void * x, const void *
|
|||||||
typedef enum {
|
typedef enum {
|
||||||
GGML_QUANTIZE_IMPL_SIMD,
|
GGML_QUANTIZE_IMPL_SIMD,
|
||||||
GGML_QUANTIZE_IMPL_REFERENCE,
|
GGML_QUANTIZE_IMPL_REFERENCE,
|
||||||
GGML_QUANTIZE_IMPL_RMSE,
|
GGML_QUANTIZE_IMPL_RMSE_SW,
|
||||||
|
GGML_QUANTIZE_IMPL_RMSE_UNBOUNDED,
|
||||||
GGML_QUANTIZE_IMPL_COUNT
|
GGML_QUANTIZE_IMPL_COUNT
|
||||||
} ggml_quantize_impl_t;
|
} ggml_quantize_impl_t;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user