mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-10 18:51:45 +00:00
Removed trailing whitespaces, removed variable-length arrays, removed debug print
This commit is contained in:
parent
124b4172ef
commit
1e06f12714
4
ggml.c
4
ggml.c
@ -16734,7 +16734,9 @@ size_t ggml_quantize_qx_0(const float * src, void * dst, int n, int64_t * hist,
|
|||||||
|
|
||||||
|
|
||||||
// Store the quantization pivots / points
|
// Store the quantization pivots / points
|
||||||
float qvals[1 << qbits];
|
// IMPORTANT: Change qvals's size depending on the maximum qbits expected
|
||||||
|
GGML_ASSERT(qbits <= 8);
|
||||||
|
float qvals[1 << 8];
|
||||||
|
|
||||||
for (int j = 0; j < (1 << qbits); j++) {
|
for (int j = 0; j < (1 << qbits); j++) {
|
||||||
qvals[j] = min_value + (mult_range * j);
|
qvals[j] = min_value + (mult_range * j);
|
||||||
|
14
llama.cpp
14
llama.cpp
@ -567,11 +567,13 @@ struct llama_file_loader {
|
|||||||
if (shard.type == GGML_TYPE_QX_0) {
|
if (shard.type == GGML_TYPE_QX_0) {
|
||||||
shard.extra_data_file_off = file.tell();
|
shard.extra_data_file_off = file.tell();
|
||||||
|
|
||||||
uint64_t extra_data[shard.ne[1]];
|
// seek until before the last element of extra_data
|
||||||
file.read_raw(extra_data, sizeof(uint64_t) * shard.ne[1]);
|
file.seek(sizeof(uint64_t) * (shard.ne[1] - 1), SEEK_CUR);
|
||||||
|
|
||||||
// set the size of the tensor here
|
// get the tensor's size from here
|
||||||
shard.size = extra_data[shard.ne[1] - 1];
|
uint64_t tensor_size = 0;
|
||||||
|
file.read_raw(&tensor_size, sizeof(uint64_t));
|
||||||
|
shard.size = tensor_size;
|
||||||
|
|
||||||
// realign, just in case extra_data isn't a multiple of 32B
|
// realign, just in case extra_data isn't a multiple of 32B
|
||||||
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
|
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
|
||||||
@ -1746,8 +1748,8 @@ static bool llama_eval_internal(
|
|||||||
lctx.n_p_eval += N;
|
lctx.n_p_eval += N;
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "\nmodel eval time: %ldms\n", (ggml_time_us() - t_start_us) / 1000);
|
// fprintf(stderr, "\nmodel eval time: %ldms\n", (ggml_time_us() - t_start_us) / 1000);
|
||||||
fflush(stderr);
|
// fflush(stderr);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user