diff --git a/convert.py b/convert.py
index adc5fdd26..f6237579d 100755
--- a/convert.py
+++ b/convert.py
@@ -812,6 +812,23 @@ def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyM
 def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
     tmap = gguf.get_tensor_name_map(ARCH, params.n_layer)
 
+    tmp = model
+
+    # HF models permut or pack some of the tensors, so we need to undo that
+    for i in itertools.count():
+        if f"model.layers.{i}.self_attn.q_proj.weight" in model:
+            print(f"Permuting layer {i}")
+            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head_kv)
+            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
+           #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
+        elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
+            print(f"Unpacking and permuting layer {i}")
+            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head_kv)
+            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
+            tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
+        else:
+            break
+
     out: LazyModel = {}
     for name, lazy_tensor in model.items():
         name_new = name
@@ -825,8 +842,9 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
         else:
             raise Exception(f"Unexpected tensor name: {name}")
 
-        if gguf.should_skip_tensor(ARCH, params.n_layer, name_new):
+        if gguf.should_skip_tensor_TMP(ARCH, params.n_layer, name_new):
             print(f"skipping tensor {name_new}")
+            continue
         else:
             print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type} | {lazy_tensor.shape}")
             out[name_new] = lazy_tensor
diff --git a/gguf.py b/gguf.py
index a4dd10872..2a6806a91 100644
--- a/gguf.py
+++ b/gguf.py
@@ -148,7 +148,11 @@ MODEL_TENSOR_SKIP = {
         ],
     }
 
-def should_skip_tensor(arch : MODEL_ARCH, n_blocks : int, name : str) -> bool:
+# TODO: the following helper functions should be removed
+#       instead, get_tensor_name_map should return tuples of (name, MODEL_TENSOR)
+#       however, my Python is very bad, and I couldn't figure out how to do this, hence these functions
+# REMOVE
+def should_skip_tensor_TMP(arch : MODEL_ARCH, n_blocks : int, name : str) -> bool:
     for skip in MODEL_TENSOR_SKIP.get(arch, []):
         for i in range(n_blocks):
             if name == MODEL_TENSOR_NAMES[arch][skip].format(bid=i):