From ad80e5a4a7a99908dfb38ed025c2a4cba4d3f839 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 4 Sep 2023 19:46:52 +0300 Subject: [PATCH] llama : add ggml_cont to trigger bug with Metal --- llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index c97c1462f..097de7221 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2418,11 +2418,11 @@ static struct ggml_cgraph * llm_build_llama( // split cached V into n_head heads struct ggml_tensor * V = - ggml_view_3d(ctx0, kv_self.v, + ggml_cont(ctx0, ggml_view_3d(ctx0, kv_self.v, n_past + N, n_embd_head, n_head_kv, ggml_element_size(kv_self.v)*n_ctx, ggml_element_size(kv_self.v)*n_ctx*n_embd_head, - ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il); + ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il)); offload_func_v(V); ggml_set_name(V, "V");