mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
ggml : testing GPU FP precision via quantized CPY
This commit is contained in:
parent
24a447e20a
commit
f64e4f04e7
@ -291,6 +291,10 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
options = [MTLCompileOptions new];
|
||||
options.preprocessorMacros = @{ @"QK_K" : @(64) };
|
||||
#endif
|
||||
// disable fast math
|
||||
// NOTE: this seems to have no effect whatsoever
|
||||
//[options setFastMathEnabled:false];
|
||||
|
||||
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
|
||||
}
|
||||
|
||||
|
@ -1103,6 +1103,8 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
|
||||
}
|
||||
}
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
|
||||
static const int qk = QK4_1;
|
||||
|
||||
@ -1110,6 +1112,9 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
|
||||
|
||||
const int nb = k / qk;
|
||||
|
||||
printf("d = %9f\n", GGML_FP16_TO_FP32(x[0].d));
|
||||
printf("m = %9f\n", GGML_FP16_TO_FP32(x[0].m));
|
||||
|
||||
for (int i = 0; i < nb; i++) {
|
||||
const float d = GGML_FP16_TO_FP32(x[i].d);
|
||||
const float m = GGML_FP16_TO_FP32(x[i].m);
|
||||
|
@ -437,11 +437,12 @@ struct test_case {
|
||||
double err = nmse(f1.data(), f2.data(), f1.size());
|
||||
if (err > ud->max_err) {
|
||||
printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
|
||||
//for (int i = 0; i < f1.size(); i++) {
|
||||
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
||||
//}
|
||||
//printf("\n");
|
||||
//exit(1);
|
||||
printf("\n");
|
||||
for (int i = 0; i < f1.size(); i++) {
|
||||
printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
||||
}
|
||||
printf("\n");
|
||||
exit(1);
|
||||
ud->ok = false;
|
||||
}
|
||||
return true;
|
||||
@ -1459,8 +1460,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
||||
|
||||
test_cases.emplace_back(new test_dup());
|
||||
|
||||
for (ggml_type type : all_types) {
|
||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
|
||||
//for (ggml_type type : all_types) {
|
||||
// test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 10, 10, 1}));
|
||||
//}
|
||||
|
||||
for (ggml_type type : { GGML_TYPE_Q4_1} ) {
|
||||
for (int i = 0; i < 2048; ++i) {
|
||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {32, 1, 1, 1}));
|
||||
}
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_cont());
|
||||
|
Loading…
Reference in New Issue
Block a user