mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
RWKV v6: Add time_mix_decay_w1/w2 in quant exclusion list (#9387)
Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
parent
fb3f249815
commit
0b4ac75772
@ -302,6 +302,8 @@ class Model:
|
||||
gguf.MODEL_TENSOR.TIME_MIX_FIRST,
|
||||
gguf.MODEL_TENSOR.TIME_MIX_W1,
|
||||
gguf.MODEL_TENSOR.TIME_MIX_W2,
|
||||
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
||||
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
|
||||
)
|
||||
)
|
||||
or not new_name.endswith(".weight")
|
||||
|
@ -17530,6 +17530,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||
quantize &= name.find("time_mix_first.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_w1.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_w2.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
|
||||
quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
|
||||
|
||||
// do not quantize relative position bias (T5)
|
||||
quantize &= name.find("attn_rel_b.weight") == std::string::npos;
|
||||
|
Loading…
Reference in New Issue
Block a user