diff --git a/common/arg.cpp b/common/arg.cpp
index ca1534c98..c49b07cc8 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1961,7 +1961,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
         }
     ));
     add_opt(llama_arg(
-        {"-lv", "--verbosity", "--log-verbosity"}, "THOLD",
+        {"-lv", "--verbosity", "--log-verbosity"}, "N",
         "Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
         [](gpt_params & params, int value) {
             params.verbosity = value;
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 61da020a8..8d5c1fdd1 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2304,7 +2304,9 @@ int main(int argc, char ** argv) {
 
     gpt_init();
 
-    const bool verbose = params.verbosity > 0;
+    // enabling this will output extra debug information in the HTTP responses from the server
+    // see format_final_response_oaicompat()
+    const bool verbose = params.verbosity > 9;
 
     // struct that contains llama context and inference
     server_context ctx_server;
@@ -2830,7 +2832,7 @@ int main(int argc, char ** argv) {
         if (!stream) {
             ctx_server.receive_cmpl_results(task_ids, [&](const std::vector<server_task_result> & results) {
                 // multitask is never support in chat completion, there is only one result
-                json result_oai = format_final_response_oaicompat(data, results[0].data, completion_id, verbose);
+                json result_oai = format_final_response_oaicompat(data, results[0].data, completion_id, /*.streaming =*/ false, verbose);
                 res_ok(res, result_oai);
             }, [&](const json & error_data) {
                 res_error(res, error_data);
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index aad86cb62..537c8a223 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -408,6 +408,7 @@ static json format_final_response_oaicompat(const json & request, const json & r
         {"id", completion_id}
     };
 
+    // extra fields for debugging purposes
     if (verbose) {
         res["__verbose"] = result;
     }