RWKV v6: Add time_mix_decay_w1/w2 in quant exclusion list (#9387)

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
Molly Sophia 2024-09-10 15:02:30 +08:00 committed by GitHub
parent fb3f249815
commit 0b4ac75772
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 0 deletions

View File

@ -302,6 +302,8 @@ class Model:
gguf.MODEL_TENSOR.TIME_MIX_FIRST,
gguf.MODEL_TENSOR.TIME_MIX_W1,
gguf.MODEL_TENSOR.TIME_MIX_W2,
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
)
)
or not new_name.endswith(".weight")

View File

@ -17530,6 +17530,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
quantize &= name.find("time_mix_first.weight") == std::string::npos;
quantize &= name.find("time_mix_w1.weight") == std::string::npos;
quantize &= name.find("time_mix_w2.weight") == std::string::npos;
quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
// do not quantize relative position bias (T5)
quantize &= name.find("attn_rel_b.weight") == std::string::npos;