From 50fae10d0339f2bd639f69dd679c0201d939a265 Mon Sep 17 00:00:00 2001 From: slaren <2141330+slaren@users.noreply.github.com> Date: Sun, 19 Mar 2023 19:22:48 +0100 Subject: [PATCH] Add --ignore-eos parameter (#181) Co-authored-by: Georgi Gerganov --- main.cpp | 10 +++++++++- utils.cpp | 3 +++ utils.h | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 8e95c23d5..e18105624 100644 --- a/main.cpp +++ b/main.cpp @@ -27,6 +27,8 @@ #define ANSI_COLOR_RESET "\x1b[0m" #define ANSI_BOLD "\x1b[1m" +static const int EOS_TOKEN_ID = 2; + // determine number of model parts based on the dimension static const std::map LLAMA_N_PARTS = { { 4096, 1 }, @@ -956,6 +958,11 @@ int main(int argc, char ** argv) { { const int64_t t_start_sample_us = ggml_time_us(); + if (params.ignore_eos) { + // set the logit of the eos token to zero to avoid sampling it + logits[logits.size() - n_vocab + EOS_TOKEN_ID] = 0; + } + id = llama_sample_top_p_top_k(vocab, logits.data() + (logits.size() - n_vocab), last_n_tokens, repeat_penalty, top_k, top_p, temp, rng); last_n_tokens.erase(last_n_tokens.begin()); @@ -1055,7 +1062,8 @@ int main(int argc, char ** argv) { } // end of text token - if (embd.back() == 2) { + + if (embd.back() == EOS_TOKEN_ID) { if (params.interactive) { is_interacting = true; } else { diff --git a/utils.cpp b/utils.cpp index 99cb30bb9..a4135b9fd 100644 --- a/utils.cpp +++ b/utils.cpp @@ -71,6 +71,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.use_color = true; } else if (arg == "-r" || arg == "--reverse-prompt") { params.antiprompt = argv[++i]; + } else if (arg == "--ignore-eos") { + params.ignore_eos = true; } else if (arg == "-h" || arg == "--help") { gpt_print_usage(argc, argv, params); exit(0); @@ -106,6 +108,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n); fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f)\n", params.repeat_penalty); fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx); + fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating\n"); fprintf(stderr, " --memory_f16 use f16 instead of f32 for memory key+value\n"); fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); diff --git a/utils.h b/utils.h index c68e4cba8..21325191a 100644 --- a/utils.h +++ b/utils.h @@ -36,6 +36,8 @@ struct gpt_params { bool interactive = false; // interactive mode bool instruct = false; // instruction mode (used for Alpaca models) + + bool ignore_eos = false; // do not stop generating after eos }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params);