From ad80e5a4a7a99908dfb38ed025c2a4cba4d3f839 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 4 Sep 2023 19:46:52 +0300
Subject: [PATCH] llama : add ggml_cont to trigger bug with Metal

---
 llama.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index c97c1462f..097de7221 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2418,11 +2418,11 @@ static struct ggml_cgraph * llm_build_llama(
 
             // split cached V into n_head heads
             struct ggml_tensor * V =
-                ggml_view_3d(ctx0, kv_self.v,
+                ggml_cont(ctx0, ggml_view_3d(ctx0, kv_self.v,
                         n_past + N, n_embd_head, n_head_kv,
                         ggml_element_size(kv_self.v)*n_ctx,
                         ggml_element_size(kv_self.v)*n_ctx*n_embd_head,
-                        ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il);
+                        ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il));
             offload_func_v(V);
             ggml_set_name(V, "V");