mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 10:54:36 +00:00
feat(convert_hf_to_gguf): support q4_0 and q4_1 quantifications
This commit is contained in:
parent
c8c07d658a
commit
a279f17815
@ -357,6 +357,10 @@ class Model:
|
|||||||
data_qtype = gguf.GGMLQuantizationType.TQ1_0
|
data_qtype = gguf.GGMLQuantizationType.TQ1_0
|
||||||
elif self.ftype == gguf.LlamaFileType.MOSTLY_TQ2_0:
|
elif self.ftype == gguf.LlamaFileType.MOSTLY_TQ2_0:
|
||||||
data_qtype = gguf.GGMLQuantizationType.TQ2_0
|
data_qtype = gguf.GGMLQuantizationType.TQ2_0
|
||||||
|
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_0:
|
||||||
|
data_qtype = gguf.GGMLQuantizationType.Q4_0
|
||||||
|
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_1:
|
||||||
|
data_qtype = gguf.GGMLQuantizationType.Q4_1
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown file type: {self.ftype.name}")
|
raise ValueError(f"Unknown file type: {self.ftype.name}")
|
||||||
|
|
||||||
@ -4293,8 +4297,8 @@ def parse_args() -> argparse.Namespace:
|
|||||||
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
|
"--outtype", type=str, choices=["f32", "f16", "bf16", "q4_0", "q4_1", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
|
||||||
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q4_0, q4_1 , q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--bigendian", action="store_true",
|
"--bigendian", action="store_true",
|
||||||
@ -4380,6 +4384,8 @@ def main() -> None:
|
|||||||
"f32": gguf.LlamaFileType.ALL_F32,
|
"f32": gguf.LlamaFileType.ALL_F32,
|
||||||
"f16": gguf.LlamaFileType.MOSTLY_F16,
|
"f16": gguf.LlamaFileType.MOSTLY_F16,
|
||||||
"bf16": gguf.LlamaFileType.MOSTLY_BF16,
|
"bf16": gguf.LlamaFileType.MOSTLY_BF16,
|
||||||
|
"q4_0": gguf.LlamaFileType.MOSTLY_Q4_0,
|
||||||
|
"q4_1": gguf.LlamaFileType.MOSTLY_Q4_1,
|
||||||
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
|
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
|
||||||
"tq1_0": gguf.LlamaFileType.MOSTLY_TQ1_0,
|
"tq1_0": gguf.LlamaFileType.MOSTLY_TQ1_0,
|
||||||
"tq2_0": gguf.LlamaFileType.MOSTLY_TQ2_0,
|
"tq2_0": gguf.LlamaFileType.MOSTLY_TQ2_0,
|
||||||
|
Loading…
Reference in New Issue
Block a user