llama-bench : add JSONL (NDJSON) output mode (#9288)

* llama-bench : add JSONL (NDJSON) output mode * llama-bench : update usage docs
2024-12-25 02:44:36 +00:00 · 2024-09-03 20:58:54 +03:00 · 2024-09-03 20:58:54 +03:00 · 8962422b1c
commit 8962422b1c
parent b69a480af4
2 changed files with 127 additions and 81 deletions
--- a/examples/llama-bench/README.md
+++ b/examples/llama-bench/README.md
@ -14,7 +14,8 @@ Performance testing tool for llama.cpp.
    1. [Markdown](#markdown)
    2. [CSV](#csv)
    3. [JSON](#json)
-    4. [SQL](#sql)
+    4. [JSONL](#jsonl)
    5. [SQL](#sql)
 ## Syntax
@ -26,13 +27,17 @@ options:
  -m, --model <filename>                    (default: models/7B/ggml-model-q4_0.gguf)
  -p, --n-prompt <n>                        (default: 512)
  -n, --n-gen <n>                           (default: 128)
-  -pg <pp,tg>                         (default: 512,128)
+  -pg <pp,tg>                               (default: )
  -b, --batch-size <n>                      (default: 2048)
  -ub, --ubatch-size <n>                    (default: 512)
  -ctk, --cache-type-k <t>                  (default: f16)
  -ctv, --cache-type-v <t>                  (default: f16)
-  -t, --threads <n>                   (default: 16)
+  -t, --threads <n>                         (default: 8)
  -C, --cpu-mask <hex,hex>                  (default: 0x0)
  --cpu-strict <0|1>                        (default: 0)
  --poll <0...100>                          (default: 50)
  -ngl, --n-gpu-layers <n>                  (default: 99)
  -rpc, --rpc <rpc_servers>                 (default: )
  -sm, --split-mode <none|layer|row>        (default: layer)
  -mg, --main-gpu <i>                       (default: 0)
  -nkvo, --no-kv-offload <0|1>              (default: 0)
@ -42,7 +47,10 @@ options:
  -embd, --embeddings <0|1>                 (default: 0)
  -ts, --tensor-split <ts0/ts1/..>          (default: 0)
  -r, --repetitions <n>                     (default: 5)
-  -o, --output <csv|json|md|sql>      (default: md)
+  --prio <0|1|2|3>                          (default: 0)
  --delay <0...N> (seconds)                 (default: 0)
  -o, --output <csv|json|jsonl|md|sql>      (default: md)
  -oe, --output-err <csv|json|jsonl|md|sql> (default: none)
  -v, --verbose                             (default: 0)
 Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.
@ -238,6 +246,19 @@ $ ./llama-bench -o json
 ]
 ```
 ### JSONL
 ```sh
 $ ./llama-bench -o jsonl
 ```
 ```json lines
 {"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":512,"n_gen":0,"test_time":"2023-09-23T12:09:57Z","avg_ns":212365953,"stddev_ns":985423,"avg_ts":2410.974041,"stddev_ts":11.163766,"samples_ns":[213837238,211635853,212328053,211329715,212698907],"samples_ts":[2394.34,2419.25,2411.36,2422.75,2407.16]}
 {"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":0,"n_gen":128,"test_time":"2023-09-23T12:09:59Z","avg_ns":977425219,"stddev_ns":9268593,"avg_ts":130.965708,"stddev_ts":1.238924,"samples_ns":[984472709,974901233,989474741,970729355,967548060],"samples_ts":[130.019,131.295,129.362,131.86,132.293]}
 ```
 ### SQL
 SQL output is suitable for importing into a SQLite database. The output can be piped into the `sqlite3` command line tool to add the results to a database.
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@ -171,13 +171,14 @@ static std::string get_gpu_info() {
 }
 // command line params
-enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL};
+enum output_formats {NONE, CSV, JSON, JSONL, MARKDOWN, SQL};
 static const char * output_format_str(output_formats format) {
    switch (format) {
        case NONE:     return "none";
        case CSV:      return "csv";
        case JSON:     return "json";
        case JSONL:    return "jsonl";
        case MARKDOWN: return "md";
        case SQL:      return "sql";
        default: GGML_ABORT("invalid output format");
@ -191,6 +192,8 @@ static bool output_format_from_str(const std::string & s, output_formats & forma
        format = CSV;
    } else if (s == "json") {
        format = JSON;
    } else if (s == "jsonl") {
        format = JSONL;
    } else if (s == "md") {
        format = MARKDOWN;
    } else if (s == "sql") {
@ -308,8 +311,8 @@ static void print_usage(int /* argc */, char ** argv) {
    printf("  -r, --repetitions <n>                     (default: %d)\n", cmd_params_defaults.reps);
    printf("  --prio <0|1|2|3>                          (default: %d)\n", cmd_params_defaults.prio);
    printf("  --delay <0...N> (seconds)                 (default: %d)\n", cmd_params_defaults.delay);
-    printf("  -o, --output <csv|json|md|sql>      (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
+    printf("  -o, --output <csv|json|jsonl|md|sql>      (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
-    printf("  -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
+    printf("  -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
    printf("  -v, --verbose                             (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
    printf("\n");
    printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
@ -1074,10 +1077,8 @@ struct csv_printer : public printer {
    }
 };
 struct json_printer : public printer {
    bool first = true;
-    static std::string escape_json(const std::string & value) {
+static std::string escape_json(const std::string & value) {
    std::string escaped;
    for (auto c : value) {
        if (c == '"') {
@ -1093,9 +1094,9 @@ struct json_printer : public printer {
        }
    }
    return escaped;
-    }
+}
-    static std::string format_value(const std::string & field, const std::string & value) {
+static std::string format_json_value(const std::string & field, const std::string & value) {
    switch (test::get_field_type(field)) {
        case test::STRING:
            return "\"" + escape_json(value) + "\"";
@ -1104,7 +1105,10 @@ struct json_printer : public printer {
        default:
            return value;
    }
-    }
+}
 struct json_printer : public printer {
    bool first = true;
    void print_header(const cmd_params & params) override {
        fprintf(fout, "[\n");
@ -1114,7 +1118,7 @@ struct json_printer : public printer {
    void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
        assert(fields.size() == values.size());
        for (size_t i = 0; i < fields.size(); i++) {
-            fprintf(fout, "    \"%s\": %s,\n", fields.at(i).c_str(), format_value(fields.at(i), values.at(i)).c_str());
+            fprintf(fout, "    \"%s\": %s,\n", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
        }
    }
@ -1137,6 +1141,25 @@ struct json_printer : public printer {
    }
 };
 struct jsonl_printer : public printer {
    void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
        assert(fields.size() == values.size());
        for (size_t i = 0; i < fields.size(); i++) {
            fprintf(fout, "\"%s\": %s, ", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
        }
    }
    void print_test(const test & t) override {
        fprintf(fout, "{");
        print_fields(test::get_fields(), t.get_values());
        fprintf(fout, "\"samples_ns\": [ %s ],", join(t.samples_ns, ", ").c_str());
        fprintf(fout, "\"samples_ts\": [ %s ]", join(t.get_ts(), ", ").c_str());
        fprintf(fout, "}\n");
        fflush(fout);
    }
 };
 struct markdown_printer : public printer {
    std::vector<std::string> fields;
@ -1437,6 +1460,8 @@ static std::unique_ptr<printer> create_printer(output_formats format) {
            return std::unique_ptr<printer>(new csv_printer());
        case JSON:
            return std::unique_ptr<printer>(new json_printer());
        case JSONL:
            return std::unique_ptr<printer>(new jsonl_printer());
        case MARKDOWN:
            return std::unique_ptr<printer>(new markdown_printer());
        case SQL: