Removed trailing whitespaces, removed variable-length arrays, removed debug print

2025-01-10 18:51:45 +00:00 · 2023-06-13 10:39:04 +01:00 · 2023-06-13 10:39:04 +01:00 · 1e06f12714
commit 1e06f12714
parent 124b4172ef
2 changed files with 44 additions and 40 deletions
--- a/ggml.c
+++ b/ggml.c
@ -16734,7 +16734,9 @@ size_t ggml_quantize_qx_0(const float * src, void * dst, int n, int64_t * hist,
        // Store the quantization pivots / points
-        float qvals[1 << qbits];
+        // IMPORTANT: Change qvals's size depending on the maximum qbits expected
        GGML_ASSERT(qbits <= 8);
        float qvals[1 << 8];
        for (int j = 0; j < (1 << qbits); j++) {
            qvals[j] = min_value + (mult_range * j);
--- a/llama.cpp
+++ b/llama.cpp
@ -567,11 +567,13 @@ struct llama_file_loader {
            if (shard.type == GGML_TYPE_QX_0) {
                shard.extra_data_file_off = file.tell();
-                uint64_t extra_data[shard.ne[1]];
+                // seek until before the last element of extra_data
-                file.read_raw(extra_data, sizeof(uint64_t) * shard.ne[1]);
+                file.seek(sizeof(uint64_t) * (shard.ne[1] - 1), SEEK_CUR);
-                // set the size of the tensor here
+                // get the tensor's size from here
-                shard.size = extra_data[shard.ne[1] - 1];
+                uint64_t tensor_size = 0;
                file.read_raw(&tensor_size, sizeof(uint64_t));
                shard.size = tensor_size;
                // realign, just in case extra_data isn't a multiple of 32B
                file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
@ -1746,8 +1748,8 @@ static bool llama_eval_internal(
        lctx.n_p_eval += N;
    }
-    fprintf(stderr, "\nmodel eval time: %ldms\n", (ggml_time_us() - t_start_us) / 1000);
+    // fprintf(stderr, "\nmodel eval time: %ldms\n", (ggml_time_us() - t_start_us) / 1000);
-    fflush(stderr);
+    // fflush(stderr);
    return true;
 }