From 8ad7cd49fb0979aeafa39e42ce5f335a4ed53e35 Mon Sep 17 00:00:00 2001
From: klosax <131523366+klosax@users.noreply.github.com>
Date: Sat, 29 Jul 2023 16:47:00 +0200
Subject: [PATCH] Update convert-llama-h5-to-gguf.py

---
 convert-llama-h5-to-gguf.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-h5-to-gguf.py
index 439a6da30..d36e6da9a 100644
--- a/convert-llama-h5-to-gguf.py
+++ b/convert-llama-h5-to-gguf.py
@@ -120,7 +120,7 @@ if Path(dir_model + "/tokenizer.model").is_file():
             text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
         score: float = tokenizer.get_score(i)
 
-        tokens.append(str(text))
+        tokens.append(text)
         scores.append(score)
 
 print("write gguf tokens")
@@ -184,6 +184,22 @@ for name in list_vars.keys():
                 name = "layers." + str(i) + ".feed_forward.w3.weight"
                 break
 
+    n_dims = len(data.shape)
+
+    # ftype == 0 -> float32, ftype == 1 -> float16
+    ftype_cur = 0
+    if ftype != 0:
+        if name.endswith(".weight") and n_dims == 2:
+            data = data.astype(np.float16)
+            ftype_cur = 1
+        else:
+            data = data.astype(np.float32)
+            ftype_cur = 0
+    else:
+        if data.dtype != np.float32:
+            data = data.astype(np.float32)
+            ftype_cur = 0
+
     gguf_writer.write_tensor_info(name, data)