From 87e25a1d1bd26eb06d1cab9e2ee4e14a7a0be33c Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sat, 6 Jul 2024 09:22:16 +0200 Subject: [PATCH] llama : add early return for empty range (#8327) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * llama : add early return for empty range This commit adds an early return to the llama_kv_cache_seq_add and llama_kv_cache_seq_div functions. The motivation for adding this is to avoid looping over the cache when the range is empty. I ran into this when using the self-extend feature in main.cpp. Signed-off-by: Daniel Bevenius * llama : add static_cast to fix CI warning/error This commit attempts to fix the following warning/error: ```console src/llama.cpp:7271:31: error: comparison of integer expressions of different signedness: ‘int’ and ‘uint32_t’ {aka ‘unsigned int’} [-Werror=sign-compare] 7271 | if (i < hparams.n_layer_dense_lead) { | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``` This can be reproduced locally by setting -Wsign-compare in the Makefile. Signed-off-by: Daniel Bevenius * squash! llama : add early return for empty range Remove the setting of cache.head to 0 when the range is empty. Signed-off-by: Daniel Bevenius * Update src/llama.cpp --------- Signed-off-by: Daniel Bevenius Co-authored-by: Georgi Gerganov --- src/llama.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llama.cpp b/src/llama.cpp index b770ca5bc..b39906fd5 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -3260,6 +3260,8 @@ static void llama_kv_cache_seq_add( if (p0 < 0) p0 = 0; if (p1 < 0) p1 = std::numeric_limits::max(); + // If there is no range then return early to avoid looping over the cache. + if (p0 == p1) return; if (cache.recurrent) { // for Mamba-like models, only the pos needs to be shifted @@ -3304,6 +3306,8 @@ static void llama_kv_cache_seq_div( int d) { if (p0 < 0) p0 = 0; if (p1 < 0) p1 = std::numeric_limits::max(); + // If there is no range then return early to avoid looping over the cache. + if (p0 == p1) return; if (cache.recurrent) { // for Mamba-like models, only the pos needs to be changed