mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-29 04:44:34 +00:00
convert-hf : allow converting the weird BitNet 1.3B
Its FFN size is 5460 which is not convenient. The offending tensors are kept in F16, which makes the final model 5.01 bpw.
This commit is contained in:
parent
961e293833
commit
0996149911
@ -301,12 +301,16 @@ class Model:
|
|||||||
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
|
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
|
||||||
# TODO: cleaner model-specific per-tensor types
|
# TODO: cleaner model-specific per-tensor types
|
||||||
# NOTE: Q1_3 is only relevant for BitNet 1.58b
|
# NOTE: Q1_3 is only relevant for BitNet 1.58b
|
||||||
if self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3 and not any(
|
if (
|
||||||
|
self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3
|
||||||
|
and gguf.can_quantize_to_q1_3(data)
|
||||||
|
and not any(
|
||||||
self.match_model_tensor_name(new_name, key, None)
|
self.match_model_tensor_name(new_name, key, None)
|
||||||
for key in [
|
for key in [
|
||||||
gguf.MODEL_TENSOR.TOKEN_EMBD,
|
gguf.MODEL_TENSOR.TOKEN_EMBD,
|
||||||
gguf.MODEL_TENSOR.OUTPUT,
|
gguf.MODEL_TENSOR.OUTPUT,
|
||||||
]
|
]
|
||||||
|
)
|
||||||
):
|
):
|
||||||
data = gguf.quantize_q1_3(data)
|
data = gguf.quantize_q1_3(data)
|
||||||
assert data.dtype == np.uint8
|
assert data.dtype == np.uint8
|
||||||
|
@ -126,6 +126,10 @@ def quantize_q8_0(data: np.ndarray):
|
|||||||
__q1_3_block_size, __q1_3_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q1_3]
|
__q1_3_block_size, __q1_3_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q1_3]
|
||||||
|
|
||||||
|
|
||||||
|
def can_quantize_to_q1_3(n: np.ndarray) -> bool:
|
||||||
|
return n.shape[-1] % __q1_3_block_size == 0
|
||||||
|
|
||||||
|
|
||||||
def __quantize_q1_3_shape_change(s: tuple[int, ...]) -> tuple[int, ...]:
|
def __quantize_q1_3_shape_change(s: tuple[int, ...]) -> tuple[int, ...]:
|
||||||
return (*s[:-1], s[-1] // __q1_3_block_size * __q1_3_type_size)
|
return (*s[:-1], s[-1] // __q1_3_block_size * __q1_3_type_size)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user