llama : fix quantization when tensors are missing (#5423)

2024-12-26 03:14:35 +00:00 · 2024-02-12 20:14:39 +02:00 · 2024-02-12 20:14:39 +02:00 · 099afc6274
commit 099afc6274
parent df334a1125
1 changed files with 24 additions and 8 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -772,22 +772,37 @@ struct LLM_TN {
    llm_arch arch;

    std::string operator()(llm_tensor tensor) const {
+        if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
+            return "__missing__";
+        }
        return LLM_TENSOR_NAMES[arch].at(tensor);
    }

    std::string operator()(llm_tensor tensor, const std::string & suffix) const {
+        if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
+            return "__missing__";
+        }
        return LLM_TENSOR_NAMES[arch].at(tensor) + "." + suffix;
    }

    std::string operator()(llm_tensor tensor, int bid) const {
+        if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
+            return "__missing__";
+        }
        return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid);
    }

    std::string operator()(llm_tensor tensor, const std::string & suffix, int bid) const {
+        if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
+            return "__missing__";
+        }
        return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid) + "." + suffix;
    }

    std::string operator()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const {
+        if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
+            return "__missing__";
+        }
        return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid, xid) + "." + suffix;
    }
 };
@ -10227,6 +10242,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
        }
        ++qs.i_ffn_up;
    }
+
    //    if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;
    //}
    // IK: let's remove this, else Q2_K is almost the same as Q3_K_S