mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
ggml : testing GPU FP precision via quantized CPY
This commit is contained in:
parent
24a447e20a
commit
f64e4f04e7
@ -291,6 +291,10 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
options = [MTLCompileOptions new];
|
options = [MTLCompileOptions new];
|
||||||
options.preprocessorMacros = @{ @"QK_K" : @(64) };
|
options.preprocessorMacros = @{ @"QK_K" : @(64) };
|
||||||
#endif
|
#endif
|
||||||
|
// disable fast math
|
||||||
|
// NOTE: this seems to have no effect whatsoever
|
||||||
|
//[options setFastMathEnabled:false];
|
||||||
|
|
||||||
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
|
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1103,6 +1103,8 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
|
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
|
||||||
static const int qk = QK4_1;
|
static const int qk = QK4_1;
|
||||||
|
|
||||||
@ -1110,6 +1112,9 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
|
|||||||
|
|
||||||
const int nb = k / qk;
|
const int nb = k / qk;
|
||||||
|
|
||||||
|
printf("d = %9f\n", GGML_FP16_TO_FP32(x[0].d));
|
||||||
|
printf("m = %9f\n", GGML_FP16_TO_FP32(x[0].m));
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
for (int i = 0; i < nb; i++) {
|
||||||
const float d = GGML_FP16_TO_FP32(x[i].d);
|
const float d = GGML_FP16_TO_FP32(x[i].d);
|
||||||
const float m = GGML_FP16_TO_FP32(x[i].m);
|
const float m = GGML_FP16_TO_FP32(x[i].m);
|
||||||
|
@ -437,11 +437,12 @@ struct test_case {
|
|||||||
double err = nmse(f1.data(), f2.data(), f1.size());
|
double err = nmse(f1.data(), f2.data(), f1.size());
|
||||||
if (err > ud->max_err) {
|
if (err > ud->max_err) {
|
||||||
printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
|
printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
|
||||||
//for (int i = 0; i < f1.size(); i++) {
|
printf("\n");
|
||||||
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
for (int i = 0; i < f1.size(); i++) {
|
||||||
//}
|
printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
||||||
//printf("\n");
|
}
|
||||||
//exit(1);
|
printf("\n");
|
||||||
|
exit(1);
|
||||||
ud->ok = false;
|
ud->ok = false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -1459,8 +1460,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|||||||
|
|
||||||
test_cases.emplace_back(new test_dup());
|
test_cases.emplace_back(new test_dup());
|
||||||
|
|
||||||
for (ggml_type type : all_types) {
|
//for (ggml_type type : all_types) {
|
||||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
|
// test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
|
||||||
|
//}
|
||||||
|
|
||||||
|
for (ggml_type type : { GGML_TYPE_Q4_1} ) {
|
||||||
|
for (int i = 0; i < 2048; ++i) {
|
||||||
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {32, 1, 1, 1}));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test_cases.emplace_back(new test_cont());
|
test_cases.emplace_back(new test_cont());
|
||||||
|
Loading…
Reference in New Issue
Block a user