llama : fix quantization when tensors are missing (#5423)

This commit is contained in:
Georgi Gerganov 2024-02-12 20:14:39 +02:00
parent df334a1125
commit 099afc6274
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -772,22 +772,37 @@ struct LLM_TN {
llm_arch arch; llm_arch arch;
std::string operator()(llm_tensor tensor) const { std::string operator()(llm_tensor tensor) const {
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
return "__missing__";
}
return LLM_TENSOR_NAMES[arch].at(tensor); return LLM_TENSOR_NAMES[arch].at(tensor);
} }
std::string operator()(llm_tensor tensor, const std::string & suffix) const { std::string operator()(llm_tensor tensor, const std::string & suffix) const {
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
return "__missing__";
}
return LLM_TENSOR_NAMES[arch].at(tensor) + "." + suffix; return LLM_TENSOR_NAMES[arch].at(tensor) + "." + suffix;
} }
std::string operator()(llm_tensor tensor, int bid) const { std::string operator()(llm_tensor tensor, int bid) const {
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
return "__missing__";
}
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid); return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid);
} }
std::string operator()(llm_tensor tensor, const std::string & suffix, int bid) const { std::string operator()(llm_tensor tensor, const std::string & suffix, int bid) const {
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
return "__missing__";
}
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid) + "." + suffix; return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid) + "." + suffix;
} }
std::string operator()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const { std::string operator()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const {
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
return "__missing__";
}
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid, xid) + "." + suffix; return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid, xid) + "." + suffix;
} }
}; };
@ -10227,6 +10242,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
} }
++qs.i_ffn_up; ++qs.i_ffn_up;
} }
// if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K; // if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;
//} //}
// IK: let's remove this, else Q2_K is almost the same as Q3_K_S // IK: let's remove this, else Q2_K is almost the same as Q3_K_S