main : add parameter --no-display-prompt (#4541)

* add the parameter : --no-display-prompt , combine with --log-disable it will display only the generated tokens * remove empty line --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-12-26 03:14:35 +00:00 · 2024-01-14 00:09:08 +08:00 · 2024-01-14 00:09:08 +08:00 · 722d33f34e
commit 722d33f34e
parent c30b1ef39a
3 changed files with 12 additions and 2 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -617,6 +617,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
            params.numa = true;
        } else if (arg == "--verbose-prompt") {
            params.verbose_prompt = true;
        } else if (arg == "--no-display-prompt") {
            params.display_prompt = false;
        } else if (arg == "-r" || arg == "--reverse-prompt") {
            if (++i >= argc) {
                invalid_param = true;
@ -936,11 +938,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
    printf("  -mg i, --main-gpu i   the GPU to use for the model (with split-mode = none),\n");
    printf("                        or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu);
 #endif
    printf("  --verbose-prompt      print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false");
    printf("  --no-display-prompt   don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false");
    printf("  -gan N, --grp-attn-n N\n");
    printf("                        group-attention factor (default: %d)\n", params.grp_attn_n);
    printf("  -gaw N, --grp-attn-w N\n");
    printf("                        group-attention width (default: %.1f)\n", (double)params.grp_attn_w);
    printf("  --verbose-prompt      print prompt before generation\n");
    printf("  -dkvc, --dump-kv-cache\n");
    printf("                        verbose print of the KV cache\n");
    printf("  -nkvo, --no-kv-offload\n");
@ -1582,6 +1585,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
    fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p);
    fprintf(stream, "typical_p: %f # default: 1.0\n", sparams.typical_p);
    fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false");
    fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false");
 }
 //
--- a/common/common.h
+++ b/common/common.h
@ -126,6 +126,7 @@ struct gpt_params {
    bool use_mlock         = false; // use mlock to keep model in memory
    bool numa              = false; // attempt optimizations that help on some NUMA systems
    bool verbose_prompt    = false; // print prompt tokens before generation
    bool display_prompt    = true;  // print prompt before generation
    bool infill            = false; // use infill mode
    bool dump_kv_cache     = false; // dump the KV cache contents for debugging purposes
    bool no_kv_offload     = false; // disable KV offloading
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -477,6 +477,7 @@ int main(int argc, char ** argv) {
    bool is_antiprompt        = false;
    bool input_echo           = true;
    bool display              = true;
    bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < embd_inp.size();
    int n_past             = 0;
@ -491,6 +492,7 @@ int main(int argc, char ** argv) {
    // the first thing we will do is to output the prompt, so set color accordingly
    console::set_display(console::prompt);
    display = params.display_prompt;
    std::vector<llama_token> embd;
    std::vector<llama_token> embd_guidance;
@ -707,7 +709,7 @@ int main(int argc, char ** argv) {
        }
        // display text
-        if (input_echo) {
+        if (input_echo && display) {
            for (auto id : embd) {
                const std::string token_str = llama_token_to_piece(ctx, id);
                printf("%s", token_str.c_str());
@ -724,6 +726,7 @@ int main(int argc, char ** argv) {
        // reset color to default if there is no pending user input
        if (input_echo && (int) embd_inp.size() == n_consumed) {
            console::set_display(console::reset);
            display = true;
        }
        // if not currently processing queued inputs;
@ -796,6 +799,7 @@ int main(int argc, char ** argv) {
                // color user input only
                console::set_display(console::user_input);
                display = params.display_prompt;
                std::string line;
                bool another_line = true;
@ -806,6 +810,7 @@ int main(int argc, char ** argv) {
                // done taking input, reset color
                console::set_display(console::reset);
                display = true;
                // Add tokens to embd only if the input buffer is non-empty
                // Entering a empty line lets the user pass control back