lazy fix for llama-bench (runs without pp_threads support)

This commit is contained in:
netrunnereve 2023-08-18 17:49:04 -04:00
parent a129a31457
commit 1c154e9ea5

View File

@ -853,7 +853,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat
int n_processed = 0; int n_processed = 0;
while (n_processed < n_prompt) { while (n_processed < n_prompt) {
int n_tokens = std::min(n_prompt - n_processed, n_batch); int n_tokens = std::min(n_prompt - n_processed, n_batch);
llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads); llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads, n_threads);
n_processed += n_tokens; n_processed += n_tokens;
} }
} }
@ -861,7 +861,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat
static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) { static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
llama_token token = llama_token_bos(); llama_token token = llama_token_bos();
for (int i = 0; i < n_gen; i++) { for (int i = 0; i < n_gen; i++) {
llama_eval(ctx, &token, 1, n_past + i, n_threads); llama_eval(ctx, &token, 1, n_past + i, n_threads, n_threads);
} }
} }