cuda : get_row_rounding F32 (#4095)

* Fix #4017 * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> --------- Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
2024-12-26 11:24:35 +00:00 · 2023-11-17 00:01:15 -08:00 · 2023-11-17 00:01:15 -08:00 · b83e149ec6
commit b83e149ec6
parent 4f447a4833
1 changed files with 2 additions and 0 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -6356,6 +6356,7 @@ static int64_t get_row_rounding(ggml_type type) {
        case GGML_TYPE_Q8_0:
            return max_compute_capability >= CC_RDNA2 ? 128 : 64;
        case GGML_TYPE_F16:
        case GGML_TYPE_F32:
            return 1;
        case GGML_TYPE_Q2_K:
            return max_compute_capability >= CC_RDNA2 ? 128 : 32;
@ -6378,6 +6379,7 @@ static int64_t get_row_rounding(ggml_type type) {
        case GGML_TYPE_Q8_0:
            return 64;
        case GGML_TYPE_F16:
        case GGML_TYPE_F32:
            return 1;
        case GGML_TYPE_Q2_K:
        case GGML_TYPE_Q3_K: