llama : fix integer overflow during quantization (#6063)

This commit is contained in:
Georgi Gerganov 2024-03-14 22:58:41 +02:00 committed by GitHub
parent 6e0438da3c
commit 4755afd1cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -11977,7 +11977,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
return new_type;
}
static int32_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
std::mutex mutex;
int counter = 0;
size_t new_size = 0;