mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-11 21:39:52 +00:00
ggml : fix Q4_3 quantization
Broke it during conflict resolution in last PR
This commit is contained in:
parent
38de86a711
commit
66aab46079
6
ggml.c
6
ggml.c
@ -12210,6 +12210,12 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|||||||
block_q4_2 * block = (block_q4_2*)dst + start / QK4_2;
|
block_q4_2 * block = (block_q4_2*)dst + start / QK4_2;
|
||||||
result = ggml_quantize_q4_2(src + start, block, n, n, hist);
|
result = ggml_quantize_q4_2(src + start, block, n, n, hist);
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_TYPE_Q4_3:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(start % QK4_3 == 0);
|
||||||
|
block_q4_3 * block = (block_q4_3*)dst + start / QK4_3;
|
||||||
|
result = ggml_quantize_q4_3(src + start, block, n, n, hist);
|
||||||
|
} break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user