Removed trailing whitespaces, removed variable-length arrays, removed debug print

This commit is contained in:
Amy 2023-06-13 10:39:04 +01:00
parent 124b4172ef
commit 1e06f12714
2 changed files with 44 additions and 40 deletions

4
ggml.c
View File

@ -16734,7 +16734,9 @@ size_t ggml_quantize_qx_0(const float * src, void * dst, int n, int64_t * hist,
// Store the quantization pivots / points
float qvals[1 << qbits];
// IMPORTANT: Change qvals's size depending on the maximum qbits expected
GGML_ASSERT(qbits <= 8);
float qvals[1 << 8];
for (int j = 0; j < (1 << qbits); j++) {
qvals[j] = min_value + (mult_range * j);

View File

@ -567,11 +567,13 @@ struct llama_file_loader {
if (shard.type == GGML_TYPE_QX_0) {
shard.extra_data_file_off = file.tell();
uint64_t extra_data[shard.ne[1]];
file.read_raw(extra_data, sizeof(uint64_t) * shard.ne[1]);
// seek until before the last element of extra_data
file.seek(sizeof(uint64_t) * (shard.ne[1] - 1), SEEK_CUR);
// set the size of the tensor here
shard.size = extra_data[shard.ne[1] - 1];
// get the tensor's size from here
uint64_t tensor_size = 0;
file.read_raw(&tensor_size, sizeof(uint64_t));
shard.size = tensor_size;
// realign, just in case extra_data isn't a multiple of 32B
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
@ -1746,8 +1748,8 @@ static bool llama_eval_internal(
lctx.n_p_eval += N;
}
fprintf(stderr, "\nmodel eval time: %ldms\n", (ggml_time_us() - t_start_us) / 1000);
fflush(stderr);
// fprintf(stderr, "\nmodel eval time: %ldms\n", (ggml_time_us() - t_start_us) / 1000);
// fflush(stderr);
return true;
}