mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
cuda : get_row_rounding F32 (#4095)
* Fix #4017 * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> --------- Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
This commit is contained in:
parent
4f447a4833
commit
b83e149ec6
@ -6356,6 +6356,7 @@ static int64_t get_row_rounding(ggml_type type) {
|
||||
case GGML_TYPE_Q8_0:
|
||||
return max_compute_capability >= CC_RDNA2 ? 128 : 64;
|
||||
case GGML_TYPE_F16:
|
||||
case GGML_TYPE_F32:
|
||||
return 1;
|
||||
case GGML_TYPE_Q2_K:
|
||||
return max_compute_capability >= CC_RDNA2 ? 128 : 32;
|
||||
@ -6378,6 +6379,7 @@ static int64_t get_row_rounding(ggml_type type) {
|
||||
case GGML_TYPE_Q8_0:
|
||||
return 64;
|
||||
case GGML_TYPE_F16:
|
||||
case GGML_TYPE_F32:
|
||||
return 1;
|
||||
case GGML_TYPE_Q2_K:
|
||||
case GGML_TYPE_Q3_K:
|
||||
|
Loading…
Reference in New Issue
Block a user