mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
llama-bench : log benchmark progress (#9287)
* llama-bench : add optional progress messages
This commit is contained in:
parent
815b1fb20a
commit
134bc38ecf
@ -249,6 +249,7 @@ struct cmd_params {
|
|||||||
ggml_sched_priority prio;
|
ggml_sched_priority prio;
|
||||||
int delay;
|
int delay;
|
||||||
bool verbose;
|
bool verbose;
|
||||||
|
bool progress;
|
||||||
output_formats output_format;
|
output_formats output_format;
|
||||||
output_formats output_format_stderr;
|
output_formats output_format_stderr;
|
||||||
};
|
};
|
||||||
@ -280,6 +281,7 @@ static const cmd_params cmd_params_defaults = {
|
|||||||
/* prio */ GGML_SCHED_PRIO_NORMAL,
|
/* prio */ GGML_SCHED_PRIO_NORMAL,
|
||||||
/* delay */ 0,
|
/* delay */ 0,
|
||||||
/* verbose */ false,
|
/* verbose */ false,
|
||||||
|
/* progress */ false,
|
||||||
/* output_format */ MARKDOWN,
|
/* output_format */ MARKDOWN,
|
||||||
/* output_format_stderr */ NONE,
|
/* output_format_stderr */ NONE,
|
||||||
};
|
};
|
||||||
@ -319,6 +321,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
|||||||
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
|
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
|
||||||
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
|
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
|
||||||
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
||||||
|
printf(" --progress (default: %s)\n", cmd_params_defaults.progress ? "1" : "0");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
|
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
|
||||||
}
|
}
|
||||||
@ -364,6 +367,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||||||
params.numa = cmd_params_defaults.numa;
|
params.numa = cmd_params_defaults.numa;
|
||||||
params.prio = cmd_params_defaults.prio;
|
params.prio = cmd_params_defaults.prio;
|
||||||
params.delay = cmd_params_defaults.delay;
|
params.delay = cmd_params_defaults.delay;
|
||||||
|
params.progress = cmd_params_defaults.progress;
|
||||||
|
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
arg = argv[i];
|
arg = argv[i];
|
||||||
@ -616,6 +620,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||||||
invalid_param = !output_format_from_str(argv[i], params.output_format_stderr);
|
invalid_param = !output_format_from_str(argv[i], params.output_format_stderr);
|
||||||
} else if (arg == "-v" || arg == "--verbose") {
|
} else if (arg == "-v" || arg == "--verbose") {
|
||||||
params.verbose = true;
|
params.verbose = true;
|
||||||
|
} else if (arg == "--progress") {
|
||||||
|
params.progress = true;
|
||||||
} else {
|
} else {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
@ -1523,7 +1529,13 @@ int main(int argc, char ** argv) {
|
|||||||
llama_model * lmodel = nullptr;
|
llama_model * lmodel = nullptr;
|
||||||
const cmd_params_instance * prev_inst = nullptr;
|
const cmd_params_instance * prev_inst = nullptr;
|
||||||
|
|
||||||
|
int params_idx = 0;
|
||||||
|
auto params_count = params_instances.size();
|
||||||
for (const auto & inst : params_instances) {
|
for (const auto & inst : params_instances) {
|
||||||
|
params_idx ++;
|
||||||
|
if (params.progress) {
|
||||||
|
fprintf(stderr, "llama-bench: benchmark %d/%ld: starting\n", params_idx, params_count);
|
||||||
|
}
|
||||||
// keep the same model between tests when possible
|
// keep the same model between tests when possible
|
||||||
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
|
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
|
||||||
if (lmodel) {
|
if (lmodel) {
|
||||||
@ -1556,7 +1568,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads);
|
struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads);
|
||||||
if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) {
|
if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) {
|
||||||
LOG_TEE("%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());
|
fprintf(stderr, "%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
tpp.strict_cpu = t.cpu_strict;
|
tpp.strict_cpu = t.cpu_strict;
|
||||||
@ -1565,7 +1577,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp);
|
struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp);
|
||||||
if (!threadpool) {
|
if (!threadpool) {
|
||||||
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
|
fprintf(stderr, "%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1573,10 +1585,16 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// warmup run
|
// warmup run
|
||||||
if (t.n_prompt > 0) {
|
if (t.n_prompt > 0) {
|
||||||
|
if (params.progress) {
|
||||||
|
fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup prompt run\n", params_idx, params_count);
|
||||||
|
}
|
||||||
//test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
|
//test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
|
||||||
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
|
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
|
||||||
}
|
}
|
||||||
if (t.n_gen > 0) {
|
if (t.n_gen > 0) {
|
||||||
|
if (params.progress) {
|
||||||
|
fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup generation run\n", params_idx, params_count);
|
||||||
|
}
|
||||||
test_gen(ctx, 1, 0, t.n_threads);
|
test_gen(ctx, 1, 0, t.n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1586,9 +1604,15 @@ int main(int argc, char ** argv) {
|
|||||||
uint64_t t_start = get_time_ns();
|
uint64_t t_start = get_time_ns();
|
||||||
|
|
||||||
if (t.n_prompt > 0) {
|
if (t.n_prompt > 0) {
|
||||||
|
if (params.progress) {
|
||||||
|
fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count, i + 1, params.reps);
|
||||||
|
}
|
||||||
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
|
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
|
||||||
}
|
}
|
||||||
if (t.n_gen > 0) {
|
if (t.n_gen > 0) {
|
||||||
|
if (params.progress) {
|
||||||
|
fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count, i + 1, params.reps);
|
||||||
|
}
|
||||||
test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads);
|
test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user