mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
llama-bench : add JSONL (NDJSON) output mode (#9288)
* llama-bench : add JSONL (NDJSON) output mode * llama-bench : update usage docs
This commit is contained in:
parent
b69a480af4
commit
8962422b1c
@ -14,7 +14,8 @@ Performance testing tool for llama.cpp.
|
|||||||
1. [Markdown](#markdown)
|
1. [Markdown](#markdown)
|
||||||
2. [CSV](#csv)
|
2. [CSV](#csv)
|
||||||
3. [JSON](#json)
|
3. [JSON](#json)
|
||||||
4. [SQL](#sql)
|
4. [JSONL](#jsonl)
|
||||||
|
5. [SQL](#sql)
|
||||||
|
|
||||||
## Syntax
|
## Syntax
|
||||||
|
|
||||||
@ -26,13 +27,17 @@ options:
|
|||||||
-m, --model <filename> (default: models/7B/ggml-model-q4_0.gguf)
|
-m, --model <filename> (default: models/7B/ggml-model-q4_0.gguf)
|
||||||
-p, --n-prompt <n> (default: 512)
|
-p, --n-prompt <n> (default: 512)
|
||||||
-n, --n-gen <n> (default: 128)
|
-n, --n-gen <n> (default: 128)
|
||||||
-pg <pp,tg> (default: 512,128)
|
-pg <pp,tg> (default: )
|
||||||
-b, --batch-size <n> (default: 2048)
|
-b, --batch-size <n> (default: 2048)
|
||||||
-ub, --ubatch-size <n> (default: 512)
|
-ub, --ubatch-size <n> (default: 512)
|
||||||
-ctk, --cache-type-k <t> (default: f16)
|
-ctk, --cache-type-k <t> (default: f16)
|
||||||
-ctv, --cache-type-v <t> (default: f16)
|
-ctv, --cache-type-v <t> (default: f16)
|
||||||
-t, --threads <n> (default: 16)
|
-t, --threads <n> (default: 8)
|
||||||
|
-C, --cpu-mask <hex,hex> (default: 0x0)
|
||||||
|
--cpu-strict <0|1> (default: 0)
|
||||||
|
--poll <0...100> (default: 50)
|
||||||
-ngl, --n-gpu-layers <n> (default: 99)
|
-ngl, --n-gpu-layers <n> (default: 99)
|
||||||
|
-rpc, --rpc <rpc_servers> (default: )
|
||||||
-sm, --split-mode <none|layer|row> (default: layer)
|
-sm, --split-mode <none|layer|row> (default: layer)
|
||||||
-mg, --main-gpu <i> (default: 0)
|
-mg, --main-gpu <i> (default: 0)
|
||||||
-nkvo, --no-kv-offload <0|1> (default: 0)
|
-nkvo, --no-kv-offload <0|1> (default: 0)
|
||||||
@ -42,7 +47,10 @@ options:
|
|||||||
-embd, --embeddings <0|1> (default: 0)
|
-embd, --embeddings <0|1> (default: 0)
|
||||||
-ts, --tensor-split <ts0/ts1/..> (default: 0)
|
-ts, --tensor-split <ts0/ts1/..> (default: 0)
|
||||||
-r, --repetitions <n> (default: 5)
|
-r, --repetitions <n> (default: 5)
|
||||||
-o, --output <csv|json|md|sql> (default: md)
|
--prio <0|1|2|3> (default: 0)
|
||||||
|
--delay <0...N> (seconds) (default: 0)
|
||||||
|
-o, --output <csv|json|jsonl|md|sql> (default: md)
|
||||||
|
-oe, --output-err <csv|json|jsonl|md|sql> (default: none)
|
||||||
-v, --verbose (default: 0)
|
-v, --verbose (default: 0)
|
||||||
|
|
||||||
Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.
|
Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.
|
||||||
@ -238,6 +246,19 @@ $ ./llama-bench -o json
|
|||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### JSONL
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ ./llama-bench -o jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
```json lines
|
||||||
|
{"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":512,"n_gen":0,"test_time":"2023-09-23T12:09:57Z","avg_ns":212365953,"stddev_ns":985423,"avg_ts":2410.974041,"stddev_ts":11.163766,"samples_ns":[213837238,211635853,212328053,211329715,212698907],"samples_ts":[2394.34,2419.25,2411.36,2422.75,2407.16]}
|
||||||
|
{"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":0,"n_gen":128,"test_time":"2023-09-23T12:09:59Z","avg_ns":977425219,"stddev_ns":9268593,"avg_ts":130.965708,"stddev_ts":1.238924,"samples_ns":[984472709,974901233,989474741,970729355,967548060],"samples_ts":[130.019,131.295,129.362,131.86,132.293]}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### SQL
|
### SQL
|
||||||
|
|
||||||
SQL output is suitable for importing into a SQLite database. The output can be piped into the `sqlite3` command line tool to add the results to a database.
|
SQL output is suitable for importing into a SQLite database. The output can be piped into the `sqlite3` command line tool to add the results to a database.
|
||||||
|
@ -171,13 +171,14 @@ static std::string get_gpu_info() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// command line params
|
// command line params
|
||||||
enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL};
|
enum output_formats {NONE, CSV, JSON, JSONL, MARKDOWN, SQL};
|
||||||
|
|
||||||
static const char * output_format_str(output_formats format) {
|
static const char * output_format_str(output_formats format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case NONE: return "none";
|
case NONE: return "none";
|
||||||
case CSV: return "csv";
|
case CSV: return "csv";
|
||||||
case JSON: return "json";
|
case JSON: return "json";
|
||||||
|
case JSONL: return "jsonl";
|
||||||
case MARKDOWN: return "md";
|
case MARKDOWN: return "md";
|
||||||
case SQL: return "sql";
|
case SQL: return "sql";
|
||||||
default: GGML_ABORT("invalid output format");
|
default: GGML_ABORT("invalid output format");
|
||||||
@ -191,6 +192,8 @@ static bool output_format_from_str(const std::string & s, output_formats & forma
|
|||||||
format = CSV;
|
format = CSV;
|
||||||
} else if (s == "json") {
|
} else if (s == "json") {
|
||||||
format = JSON;
|
format = JSON;
|
||||||
|
} else if (s == "jsonl") {
|
||||||
|
format = JSONL;
|
||||||
} else if (s == "md") {
|
} else if (s == "md") {
|
||||||
format = MARKDOWN;
|
format = MARKDOWN;
|
||||||
} else if (s == "sql") {
|
} else if (s == "sql") {
|
||||||
@ -308,8 +311,8 @@ static void print_usage(int /* argc */, char ** argv) {
|
|||||||
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
||||||
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
|
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
|
||||||
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
|
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
|
||||||
printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
|
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
|
||||||
printf(" -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
|
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
|
||||||
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
|
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
|
||||||
@ -1074,8 +1077,6 @@ struct csv_printer : public printer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct json_printer : public printer {
|
|
||||||
bool first = true;
|
|
||||||
|
|
||||||
static std::string escape_json(const std::string & value) {
|
static std::string escape_json(const std::string & value) {
|
||||||
std::string escaped;
|
std::string escaped;
|
||||||
@ -1095,7 +1096,7 @@ struct json_printer : public printer {
|
|||||||
return escaped;
|
return escaped;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string format_value(const std::string & field, const std::string & value) {
|
static std::string format_json_value(const std::string & field, const std::string & value) {
|
||||||
switch (test::get_field_type(field)) {
|
switch (test::get_field_type(field)) {
|
||||||
case test::STRING:
|
case test::STRING:
|
||||||
return "\"" + escape_json(value) + "\"";
|
return "\"" + escape_json(value) + "\"";
|
||||||
@ -1106,6 +1107,9 @@ struct json_printer : public printer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct json_printer : public printer {
|
||||||
|
bool first = true;
|
||||||
|
|
||||||
void print_header(const cmd_params & params) override {
|
void print_header(const cmd_params & params) override {
|
||||||
fprintf(fout, "[\n");
|
fprintf(fout, "[\n");
|
||||||
(void) params;
|
(void) params;
|
||||||
@ -1114,7 +1118,7 @@ struct json_printer : public printer {
|
|||||||
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
|
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
|
||||||
assert(fields.size() == values.size());
|
assert(fields.size() == values.size());
|
||||||
for (size_t i = 0; i < fields.size(); i++) {
|
for (size_t i = 0; i < fields.size(); i++) {
|
||||||
fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_value(fields.at(i), values.at(i)).c_str());
|
fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1137,6 +1141,25 @@ struct json_printer : public printer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct jsonl_printer : public printer {
|
||||||
|
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
|
||||||
|
assert(fields.size() == values.size());
|
||||||
|
for (size_t i = 0; i < fields.size(); i++) {
|
||||||
|
fprintf(fout, "\"%s\": %s, ", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_test(const test & t) override {
|
||||||
|
fprintf(fout, "{");
|
||||||
|
print_fields(test::get_fields(), t.get_values());
|
||||||
|
fprintf(fout, "\"samples_ns\": [ %s ],", join(t.samples_ns, ", ").c_str());
|
||||||
|
fprintf(fout, "\"samples_ts\": [ %s ]", join(t.get_ts(), ", ").c_str());
|
||||||
|
fprintf(fout, "}\n");
|
||||||
|
fflush(fout);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct markdown_printer : public printer {
|
struct markdown_printer : public printer {
|
||||||
std::vector<std::string> fields;
|
std::vector<std::string> fields;
|
||||||
|
|
||||||
@ -1437,6 +1460,8 @@ static std::unique_ptr<printer> create_printer(output_formats format) {
|
|||||||
return std::unique_ptr<printer>(new csv_printer());
|
return std::unique_ptr<printer>(new csv_printer());
|
||||||
case JSON:
|
case JSON:
|
||||||
return std::unique_ptr<printer>(new json_printer());
|
return std::unique_ptr<printer>(new json_printer());
|
||||||
|
case JSONL:
|
||||||
|
return std::unique_ptr<printer>(new jsonl_printer());
|
||||||
case MARKDOWN:
|
case MARKDOWN:
|
||||||
return std::unique_ptr<printer>(new markdown_printer());
|
return std::unique_ptr<printer>(new markdown_printer());
|
||||||
case SQL:
|
case SQL:
|
||||||
|
Loading…
Reference in New Issue
Block a user