mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-10 18:51:45 +00:00
Removed trailing whitespaces, removed variable-length arrays, removed debug print
This commit is contained in:
parent
124b4172ef
commit
1e06f12714
4
ggml.c
4
ggml.c
@ -16734,7 +16734,9 @@ size_t ggml_quantize_qx_0(const float * src, void * dst, int n, int64_t * hist,
|
||||
|
||||
|
||||
// Store the quantization pivots / points
|
||||
float qvals[1 << qbits];
|
||||
// IMPORTANT: Change qvals's size depending on the maximum qbits expected
|
||||
GGML_ASSERT(qbits <= 8);
|
||||
float qvals[1 << 8];
|
||||
|
||||
for (int j = 0; j < (1 << qbits); j++) {
|
||||
qvals[j] = min_value + (mult_range * j);
|
||||
|
14
llama.cpp
14
llama.cpp
@ -567,11 +567,13 @@ struct llama_file_loader {
|
||||
if (shard.type == GGML_TYPE_QX_0) {
|
||||
shard.extra_data_file_off = file.tell();
|
||||
|
||||
uint64_t extra_data[shard.ne[1]];
|
||||
file.read_raw(extra_data, sizeof(uint64_t) * shard.ne[1]);
|
||||
// seek until before the last element of extra_data
|
||||
file.seek(sizeof(uint64_t) * (shard.ne[1] - 1), SEEK_CUR);
|
||||
|
||||
// set the size of the tensor here
|
||||
shard.size = extra_data[shard.ne[1] - 1];
|
||||
// get the tensor's size from here
|
||||
uint64_t tensor_size = 0;
|
||||
file.read_raw(&tensor_size, sizeof(uint64_t));
|
||||
shard.size = tensor_size;
|
||||
|
||||
// realign, just in case extra_data isn't a multiple of 32B
|
||||
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
|
||||
@ -1746,8 +1748,8 @@ static bool llama_eval_internal(
|
||||
lctx.n_p_eval += N;
|
||||
}
|
||||
|
||||
fprintf(stderr, "\nmodel eval time: %ldms\n", (ggml_time_us() - t_start_us) / 1000);
|
||||
fflush(stderr);
|
||||
// fprintf(stderr, "\nmodel eval time: %ldms\n", (ggml_time_us() - t_start_us) / 1000);
|
||||
// fflush(stderr);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user