diff --git a/common/arg.cpp b/common/arg.cpp index ca1534c98..c49b07cc8 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1961,7 +1961,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex, } )); add_opt(llama_arg( - {"-lv", "--verbosity", "--log-verbosity"}, "THOLD", + {"-lv", "--verbosity", "--log-verbosity"}, "N", "Set the verbosity threshold. Messages with a higher verbosity will be ignored.", [](gpt_params & params, int value) { params.verbosity = value; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 61da020a8..8d5c1fdd1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2304,7 +2304,9 @@ int main(int argc, char ** argv) { gpt_init(); - const bool verbose = params.verbosity > 0; + // enabling this will output extra debug information in the HTTP responses from the server + // see format_final_response_oaicompat() + const bool verbose = params.verbosity > 9; // struct that contains llama context and inference server_context ctx_server; @@ -2830,7 +2832,7 @@ int main(int argc, char ** argv) { if (!stream) { ctx_server.receive_cmpl_results(task_ids, [&](const std::vector & results) { // multitask is never support in chat completion, there is only one result - json result_oai = format_final_response_oaicompat(data, results[0].data, completion_id, verbose); + json result_oai = format_final_response_oaicompat(data, results[0].data, completion_id, /*.streaming =*/ false, verbose); res_ok(res, result_oai); }, [&](const json & error_data) { res_error(res, error_data); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index aad86cb62..537c8a223 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -408,6 +408,7 @@ static json format_final_response_oaicompat(const json & request, const json & r {"id", completion_id} }; + // extra fields for debugging purposes if (verbose) { res["__verbose"] = result; }