From f64e4f04e7608f9fefe7c48a9abf01a8713e9e51 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sat, 30 Dec 2023 13:22:57 +0200
Subject: [PATCH] ggml : testing GPU FP precision via quantized CPY

---
 ggml-metal.m               |  4 ++++
 ggml-quants.c              |  5 +++++
 tests/test-backend-ops.cpp | 21 ++++++++++++++-------
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/ggml-metal.m b/ggml-metal.m
index 51a72ae33..50afa8ea9 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -291,6 +291,10 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
             options = [MTLCompileOptions new];
             options.preprocessorMacros = @{ @"QK_K" : @(64) };
 #endif
+            // disable fast math
+            // NOTE: this seems to have no effect whatsoever
+            //[options setFastMathEnabled:false];
+
             ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
         }
 
diff --git a/ggml-quants.c b/ggml-quants.c
index 05ef8f9b7..66426bd52 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -1103,6 +1103,8 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
     }
 }
 
+#include <stdio.h>
+
 void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
     static const int qk = QK4_1;
 
@@ -1110,6 +1112,9 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
 
     const int nb = k / qk;
 
+    printf("d = %9f\n", GGML_FP16_TO_FP32(x[0].d));
+    printf("m = %9f\n", GGML_FP16_TO_FP32(x[0].m));
+
     for (int i = 0; i < nb; i++) {
         const float d = GGML_FP16_TO_FP32(x[i].d);
         const float m = GGML_FP16_TO_FP32(x[i].m);
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index b115299c0..05eed25b5 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -437,11 +437,12 @@ struct test_case {
             double err = nmse(f1.data(), f2.data(), f1.size());
             if (err > ud->max_err) {
                 printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
-                //for (int i = 0; i < f1.size(); i++) {
-                //    printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
-                //}
-                //printf("\n");
-                //exit(1);
+                printf("\n");
+                for (int i = 0; i < f1.size(); i++) {
+                    printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
+                }
+                printf("\n");
+                exit(1);
                 ud->ok = false;
             }
             return true;
@@ -1459,8 +1460,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
 
     test_cases.emplace_back(new test_dup());
 
-    for (ggml_type type : all_types) {
-       test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
+    //for (ggml_type type : all_types) {
+    //   test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
+    //}
+
+    for (ggml_type type : { GGML_TYPE_Q4_1} ) {
+        for (int i = 0; i < 2048; ++i) {
+            test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {32, 1, 1, 1}));
+        }
     }
 
     test_cases.emplace_back(new test_cont());