diff --git a/convert-gptneox-h5-to-gguf.py b/convert-gptneox-h5-to-gguf.py
index 4f9d41487..066ed0da0 100644
--- a/convert-gptneox-h5-to-gguf.py
+++ b/convert-gptneox-h5-to-gguf.py
@@ -1,14 +1,36 @@
 # Quick and dirty HF gptneox--> gguf conversion
 
 import gguf
+import os
 import sys
 import struct
 import json
 import numpy as np
 from typing import Any, List
 from pathlib import Path
-from transformers import AutoModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM
 
+# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
+def bytes_to_unicode():
+    """
+    Returns list of utf-8 byte and a corresponding list of unicode strings.
+    The reversible bpe codes work on unicode strings.
+    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
+    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
+    This is a significant percentage of your normal, say, 32K bpe vocab.
+    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
+    And avoids mapping to whitespace/control characters the bpe code barfs on.
+    """
+    bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
+    cs = bs[:]
+    n = 0
+    for b in range(2**8):
+        if b not in bs:
+            bs.append(b)
+            cs.append(2**8+n)
+            n += 1
+    cs = [chr(n) for n in cs]
+    return dict(zip(bs, cs))
 
 if len(sys.argv) < 3:
     print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
@@ -20,7 +42,7 @@ if len(sys.argv) < 3:
 # output in the same directory as the model
 dir_model = sys.argv[1]
 fname_out = sys.argv[1] + "/ggml-model.bin"
-
+last_dir = os.path.basename(os.path.normpath(dir_model))
 
 # possible tensor data types
 #   ftype == 0 -> float32
@@ -37,6 +59,8 @@ if len(sys.argv) > 2:
         sys.exit(1)
     fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
 
+print("gguf: loading model "+last_dir)
+
 with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
     hparams = json.load(f)
 
@@ -44,17 +68,17 @@ if hparams["architectures"][0] != "GPTNeoXForCausalLM":
     print("Model architecture not supported: " + hparams["architectures"][0] )
     sys.exit()
 
+
 model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True, trust_remote_code=True)
 list_vars = model.state_dict()
 
 gguf_writer = gguf.GGUFWriter.open(fname_out)
 
-
-print("gguf: add key-values, metadata")
+print("gguf: add metadata")
 
 llm_arch = "gptneox"
 
-gguf_writer.add_name("pythia-70b-deduped")
+gguf_writer.add_name(last_dir)
 gguf_writer.add_description("gguf test model")
 gguf_writer.add_architecture(llm_arch)
 gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"])
@@ -68,28 +92,55 @@ gguf_writer.add_layer_norm_eps(llm_arch, hparams["layer_norm_eps"])
 
 # TOKENIZATION
 
-print("gguf: add key-values, tokenizer")
+print("gguf: add tokenizer")
 
 tokens: List[str] = []
 merges: List[str] = []
 
+
 if Path(dir_model + "/tokenizer.json").is_file():
-    # vocab type gpt2
-    print("gguf: adding gpt2 tokenizer vocab")
+    # gpt2 tokenizer
+    gguf_writer.add_tokenizer_model("gpt2")
+
+    print("gguf: adding gpt2 tokenizer merges")
 
     with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
-        tokenizer = json.load(f)
+        tokenizer_json = json.load(f)
+    merges = tokenizer_json["model"]["merges"]
 
-    for key in tokenizer["model"]["vocab"]:
-        tokens.append(key)
-
-    merges = tokenizer["model"]["merges"]
-
-    gguf_writer.add_tokenizer_model("gpt2")
-    gguf_writer.add_token_list(tokens)
     gguf_writer.add_token_merges(merges)
 
-    if "added_tokens" in tokenizer and Path(dir_model + "/tokenizer_config.json").is_file():
+    print("gguf: adding gpt2 tokenizer vocab")
+
+    vocab_size = len( tokenizer_json["model"]["vocab"] )
+
+    # from ggllm.cpp falcon_convert.py
+    tokenizer = AutoTokenizer.from_pretrained(dir_model)
+
+    reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
+    byte_encoder = bytes_to_unicode()
+    byte_decoder = {v:k for k, v in byte_encoder.items()}
+
+    for i in range(vocab_size):
+        if i in reverse_vocab:
+            try:
+                text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
+            except KeyError:
+                text = bytearray()
+                for c in reverse_vocab[i]:
+                    if ord(c) < 256:  # single byte character
+                        text.append(byte_decoder[ord(c)])
+                    else:  # multibyte special token character
+                        text.extend(c.encode('utf-8'))
+        else:
+            print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
+            padding_token = f"[PAD{i}]".encode("utf8")
+            text = bytearray(padding_token)
+        tokens.append(text)
+
+    gguf_writer.add_token_list(tokens)
+
+    if "added_tokens" in tokenizer_json and Path(dir_model + "/tokenizer_config.json").is_file():
         print("gguf: adding special token ids")
 
         with open(dir_model + "/tokenizer_config.json", "r", encoding="utf-8") as f:
@@ -98,27 +149,27 @@ if Path(dir_model + "/tokenizer.json").is_file():
         # find special token ids
 
         if "bos_token" in tokenizer_config:
-            for key in tokenizer["added_tokens"]:
+            for key in tokenizer_json["added_tokens"]:
                 if key["content"] == tokenizer_config["bos_token"]:
                     gguf_writer.add_bos_token_id(key["id"])
 
         if "eos_token" in tokenizer_config:
-            for key in tokenizer["added_tokens"]:
+            for key in tokenizer_json["added_tokens"]:
                 if key["content"] == tokenizer_config["eos_token"]:
                     gguf_writer.add_eos_token_id(key["id"])
 
         if "unk_token" in tokenizer_config:
-            for key in tokenizer["added_tokens"]:
+            for key in tokenizer_json["added_tokens"]:
                 if key["content"] == tokenizer_config["unk_token"]:
                     gguf_writer.add_unk_token_id(key["id"])
 
         if "sep_token" in tokenizer_config:
-            for key in tokenizer["added_tokens"]:
+            for key in tokenizer_json["added_tokens"]:
                 if key["content"] == tokenizer_config["sep_token"]:
                     gguf_writer.add_sep_token_id(key["id"])
 
         if "pad_token" in tokenizer_config:
-            for key in tokenizer["added_tokens"]:
+            for key in tokenizer_json["added_tokens"]:
                 if key["content"] == tokenizer_config["pad_token"]:
                     gguf_writer.add_pad_token_id(key["id"])
 
@@ -165,11 +216,9 @@ print("gguf: write tensor data")
 
 for name in list_vars.keys():
     data = list_vars[name].squeeze().numpy()
-#    print("Process tensor: " + name + " with shape: ", data.shape)
 
     # we don't need these
     if name.endswith(".attention.masked_bias") or name.endswith(".attention.bias") or name.endswith(".attention.rotary_emb.inv_freq"):
-#        print("  Skip tensor: " + name)
         continue
 
     n_dims = len(data.shape)
@@ -178,16 +227,13 @@ for name in list_vars.keys():
     ftype_cur = 0
     if ftype != 0:
         if name.endswith(".weight") and n_dims == 2:
-#            print("  Converting to float16")
             data = data.astype(np.float16)
             ftype_cur = 1
         else:
-#            print("  Converting to float32")
             data = data.astype(np.float32)
             ftype_cur = 0
     else:
         if data.dtype != np.float32:
-#            print("  Converting to float32")
             data = data.astype(np.float32)
             ftype_cur = 0