mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 12:10:18 +00:00
k-quants : fix quantization ranges (#3646)
This commit is contained in:
parent
940efa95fe
commit
281ef73c25
30
k_quants.c
30
k_quants.c
@ -462,12 +462,9 @@ void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
size_t ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
||||||
const int nb = k / QK_K;
|
(void)hist; // TODO: collect histograms
|
||||||
|
|
||||||
// TODO - collect histograms - although, at a second thought, I don't really care about them
|
for (int j = 0; j < n; j += k) {
|
||||||
(void)hist;
|
|
||||||
|
|
||||||
for (int j = 0; j < nb; j += k) {
|
|
||||||
block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
|
block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
|
||||||
quantize_row_q2_K_reference(src + j, y, k);
|
quantize_row_q2_K_reference(src + j, y, k);
|
||||||
}
|
}
|
||||||
@ -678,12 +675,9 @@ void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
size_t ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
||||||
const int nb = k / QK_K;
|
(void)hist; // TODO: collect histograms
|
||||||
|
|
||||||
// TODO - collect histograms - although, at a second thought, I don't really care about them
|
for (int j = 0; j < n; j += k) {
|
||||||
(void)hist;
|
|
||||||
|
|
||||||
for (int j = 0; j < nb; j += k) {
|
|
||||||
block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
|
block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
|
||||||
quantize_row_q3_K_reference(src + j, y, k);
|
quantize_row_q3_K_reference(src + j, y, k);
|
||||||
}
|
}
|
||||||
@ -846,9 +840,9 @@ void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
|
|||||||
|
|
||||||
size_t ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
size_t ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
||||||
assert(k % QK_K == 0);
|
assert(k % QK_K == 0);
|
||||||
const int nb = k / QK_K;
|
|
||||||
(void)hist; // TODO: collect histograms
|
(void)hist; // TODO: collect histograms
|
||||||
for (int j = 0; j < nb; j += k) {
|
|
||||||
|
for (int j = 0; j < n; j += k) {
|
||||||
block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
|
block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
|
||||||
quantize_row_q4_K_reference(src + j, y, k);
|
quantize_row_q4_K_reference(src + j, y, k);
|
||||||
}
|
}
|
||||||
@ -1052,9 +1046,9 @@ void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
|
|||||||
|
|
||||||
size_t ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
size_t ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
||||||
assert(k % QK_K == 0);
|
assert(k % QK_K == 0);
|
||||||
const int nb = k / QK_K;
|
(void)hist; // TODO: collect histograms
|
||||||
(void)hist;
|
|
||||||
for (int j = 0; j < nb; j += k) {
|
for (int j = 0; j < n; j += k) {
|
||||||
block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
|
block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
|
||||||
quantize_row_q5_K_reference(src + j, y, k);
|
quantize_row_q5_K_reference(src + j, y, k);
|
||||||
}
|
}
|
||||||
@ -1200,11 +1194,9 @@ void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
|
|||||||
|
|
||||||
size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
|
size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
|
||||||
assert(k % QK_K == 0);
|
assert(k % QK_K == 0);
|
||||||
const int nb = k / QK_K;
|
(void)hist; // TODO: collect histograms
|
||||||
|
|
||||||
(void)hist; // TODO
|
for (int j = 0; j < n; j += k) {
|
||||||
|
|
||||||
for (int j = 0; j < nb; j += k) {
|
|
||||||
block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
|
block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
|
||||||
quantize_row_q6_K_reference(src + j, y, k);
|
quantize_row_q6_K_reference(src + j, y, k);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user