ggml : testing GPU FP precision via quantized CPY

2024-12-27 03:44:35 +00:00 · 2023-12-30 13:22:57 +02:00 · 2023-12-30 13:22:57 +02:00 · f64e4f04e7
commit f64e4f04e7
parent 24a447e20a
3 changed files with 23 additions and 7 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -291,6 +291,10 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
            options = [MTLCompileOptions new];
            options.preprocessorMacros = @{ @"QK_K" : @(64) };
 #endif
            // disable fast math
            // NOTE: this seems to have no effect whatsoever
            //[options setFastMathEnabled:false];
            ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
        }
--- a/ggml-quants.c
+++ b/ggml-quants.c
@ -1103,6 +1103,8 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
    }
 }
 #include <stdio.h>
 void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
    static const int qk = QK4_1;
@ -1110,6 +1112,9 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
    const int nb = k / qk;
    printf("d = %9f\n", GGML_FP16_TO_FP32(x[0].d));
    printf("m = %9f\n", GGML_FP16_TO_FP32(x[0].m));
    for (int i = 0; i < nb; i++) {
        const float d = GGML_FP16_TO_FP32(x[i].d);
        const float m = GGML_FP16_TO_FP32(x[i].m);
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@ -437,11 +437,12 @@ struct test_case {
            double err = nmse(f1.data(), f2.data(), f1.size());
            if (err > ud->max_err) {
                printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
-                //for (int i = 0; i < f1.size(); i++) {
+                printf("\n");
-                //    printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
+                for (int i = 0; i < f1.size(); i++) {
-                //}
+                    printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
-                //printf("\n");
+                }
-                //exit(1);
+                printf("\n");
                exit(1);
                ud->ok = false;
            }
            return true;
@ -1459,8 +1460,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
    test_cases.emplace_back(new test_dup());
-    for (ggml_type type : all_types) {
+    //for (ggml_type type : all_types) {
-       test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
+    //   test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
    //}
    for (ggml_type type : { GGML_TYPE_Q4_1} ) {
        for (int i = 0; i < 2048; ++i) {
            test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {32, 1, 1, 1}));
        }
    }
    test_cases.emplace_back(new test_cont());