From 1c154e9ea5a7cfe65fb1ce6908501b957fba9926 Mon Sep 17 00:00:00 2001 From: netrunnereve <139727413+netrunnereve@users.noreply.github.com> Date: Fri, 18 Aug 2023 17:49:04 -0400 Subject: [PATCH] lazy fix for llama-bench (runs without pp_threads support) --- examples/llama-bench/llama-bench.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 266c8eab3..d69dcaf1a 100755 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -853,7 +853,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat int n_processed = 0; while (n_processed < n_prompt) { int n_tokens = std::min(n_prompt - n_processed, n_batch); - llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads); + llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads, n_threads); n_processed += n_tokens; } } @@ -861,7 +861,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) { llama_token token = llama_token_bos(); for (int i = 0; i < n_gen; i++) { - llama_eval(ctx, &token, 1, n_past + i, n_threads); + llama_eval(ctx, &token, 1, n_past + i, n_threads, n_threads); } }