ggml : testing GPU FP precision via quantized CPY

This commit is contained in:
Georgi Gerganov 2023-12-30 13:22:57 +02:00
parent 24a447e20a
commit f64e4f04e7
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
3 changed files with 23 additions and 7 deletions

View File

@ -291,6 +291,10 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
options = [MTLCompileOptions new]; options = [MTLCompileOptions new];
options.preprocessorMacros = @{ @"QK_K" : @(64) }; options.preprocessorMacros = @{ @"QK_K" : @(64) };
#endif #endif
// disable fast math
// NOTE: this seems to have no effect whatsoever
//[options setFastMathEnabled:false];
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error]; ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
} }

View File

@ -1103,6 +1103,8 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
} }
} }
#include <stdio.h>
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) { void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
static const int qk = QK4_1; static const int qk = QK4_1;
@ -1110,6 +1112,9 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
const int nb = k / qk; const int nb = k / qk;
printf("d = %9f\n", GGML_FP16_TO_FP32(x[0].d));
printf("m = %9f\n", GGML_FP16_TO_FP32(x[0].m));
for (int i = 0; i < nb; i++) { for (int i = 0; i < nb; i++) {
const float d = GGML_FP16_TO_FP32(x[i].d); const float d = GGML_FP16_TO_FP32(x[i].d);
const float m = GGML_FP16_TO_FP32(x[i].m); const float m = GGML_FP16_TO_FP32(x[i].m);

View File

@ -437,11 +437,12 @@ struct test_case {
double err = nmse(f1.data(), f2.data(), f1.size()); double err = nmse(f1.data(), f2.data(), f1.size());
if (err > ud->max_err) { if (err > ud->max_err) {
printf("[%s] NMSE = %f ", ggml_op_desc(t1), err); printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
//for (int i = 0; i < f1.size(); i++) { printf("\n");
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]); for (int i = 0; i < f1.size(); i++) {
//} printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
//printf("\n"); }
//exit(1); printf("\n");
exit(1);
ud->ok = false; ud->ok = false;
} }
return true; return true;
@ -1459,8 +1460,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
test_cases.emplace_back(new test_dup()); test_cases.emplace_back(new test_dup());
for (ggml_type type : all_types) { //for (ggml_type type : all_types) {
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1})); // test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
//}
for (ggml_type type : { GGML_TYPE_Q4_1} ) {
for (int i = 0; i < 2048; ++i) {
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {32, 1, 1, 1}));
}
} }
test_cases.emplace_back(new test_cont()); test_cases.emplace_back(new test_cont());