mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
lazy fix for llama-bench (runs without pp_threads support)
This commit is contained in:
parent
a129a31457
commit
1c154e9ea5
@ -853,7 +853,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat
|
|||||||
int n_processed = 0;
|
int n_processed = 0;
|
||||||
while (n_processed < n_prompt) {
|
while (n_processed < n_prompt) {
|
||||||
int n_tokens = std::min(n_prompt - n_processed, n_batch);
|
int n_tokens = std::min(n_prompt - n_processed, n_batch);
|
||||||
llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads);
|
llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads, n_threads);
|
||||||
n_processed += n_tokens;
|
n_processed += n_tokens;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -861,7 +861,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat
|
|||||||
static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
|
static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
|
||||||
llama_token token = llama_token_bos();
|
llama_token token = llama_token_bos();
|
||||||
for (int i = 0; i < n_gen; i++) {
|
for (int i = 0; i < n_gen; i++) {
|
||||||
llama_eval(ctx, &token, 1, n_past + i, n_threads);
|
llama_eval(ctx, &token, 1, n_past + i, n_threads, n_threads);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user