From 2bfe9de6d3a3598d4b778f9b144bb8ac33c2797b Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Sun, 18 Aug 2024 22:43:39 -0400
Subject: [PATCH] llama : support running Mamba-Codestral-7B-v0.1

---
 convert_hf_to_gguf.py | 4 ++++
 src/llama.cpp         | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 0ac64574a..a5bdd5def 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -2843,6 +2843,10 @@ class Mamba2Model(Model):
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
         del bid  # unused
 
+        if name.startswith("model.backbone") or name.startswith("model.lm_head"):
+            # map Mamba-Codestral-7B-v0.1 tensor names to the names used by Mamba-2
+            name = name.removeprefix("model.")
+
         if name.endswith(".dt_bias"):
             name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
 
diff --git a/src/llama.cpp b/src/llama.cpp
index 5be0ef7a2..fd80361bd 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -9383,7 +9383,7 @@ static struct ggml_tensor * llm_build_mamba2(
         // grouped RMS norm
         y = ggml_reshape_4d(ctx, y, d_inner / n_group, n_group, n_seq_tokens, n_seqs);
         y = llm_build_norm(ctx, y, hparams,
-                model.layers[il].ssm_norm, NULL,
+                ggml_reshape_2d(ctx, model.layers[il].ssm_norm, d_inner / n_group, n_group), NULL,
                 LLM_NORM_RMS, cb, il);
         y = ggml_reshape_3d(ctx, y, d_inner, n_seq_tokens, n_seqs);