convert : use f32 outtype for bf16 tensors (#6106)

The old behaviour is to use f16, but bf16 to f16 is not a lossless conversion. Change the outtype to f32 to default to a lossless conversion.
2024-12-25 02:44:36 +00:00 · 2024-03-18 09:04:41 +01:00 · 2024-03-18 09:04:41 +01:00 · 3a6efdd03c
commit 3a6efdd03c
parent d01b3c4c32
1 changed files with 2 additions and 2 deletions
--- a/convert.py
+++ b/convert.py
@ -1167,9 +1167,9 @@ class OutputFile:
 def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
    wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) + ".weight"].data_type

-    if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
+    if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)):
        return GGMLFileType.AllF32
-    if output_type_str == "f16" or (output_type_str is None and wq_type in (DT_F16, DT_BF16)):
+    if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16):
        return GGMLFileType.MostlyF16
    if output_type_str == "q8_0":
        return GGMLFileType.MostlyQ8_0