llama-bench : use two tokens in the warmup run for prompt evals (#3059)

This commit is contained in:
slaren 2023-09-07 15:52:34 +02:00 committed by GitHub
parent be8c9c245b
commit 15b67a66c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -986,7 +986,12 @@ int main(int argc, char ** argv) {
test t(inst, lmodel, ctx); test t(inst, lmodel, ctx);
// warmup run // warmup run
if (t.n_prompt > 0) {
test_prompt(ctx, std::min(2, t.n_batch), 0, t.n_batch, t.n_threads);
}
if (t.n_gen > 0) {
test_gen(ctx, 1, 0, t.n_threads); test_gen(ctx, 1, 0, t.n_threads);
}
for (int i = 0; i < params.reps; i++) { for (int i = 0; i < params.reps; i++) {
uint64_t t_start = get_time_ns(); uint64_t t_start = get_time_ns();