Immediately start processing the prompt before user input has been provided (#476)

This commit is contained in:
Georgi Gerganov 2023-03-24 23:17:58 +02:00 committed by GitHub
parent 7a9b6c3a8b
commit 04c6f5ed6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 13 additions and 7 deletions

View File

@ -3,4 +3,4 @@
# Temporary script - will be removed in the future # Temporary script - will be removed in the future
# #
./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7 ./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins -b 256 --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7

View File

@ -3,4 +3,4 @@
# Temporary script - will be removed in the future # Temporary script - will be removed in the future
# #
./main -m ./models/7B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt ./main -m ./models/7B/ggml-model-q4_0.bin -b 128 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt

View File

@ -13,7 +13,7 @@ N_PREDICTS="${N_PREDICTS:-2048}"
# Note: you can also override the generation options by specifying them on the command line: # Note: you can also override the generation options by specifying them on the command line:
# For example, override the context size by doing: ./chatLLaMa --ctx_size 1024 # For example, override the context size by doing: ./chatLLaMa --ctx_size 1024
GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --repeat_penalty 1.17647}" GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647}"
# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS # shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS
./main $GEN_OPTIONS \ ./main $GEN_OPTIONS \

View File

@ -372,7 +372,7 @@ int main(int argc, char ** argv) {
n_past += embd.size(); n_past += embd.size();
embd.clear(); embd.clear();
if ((int) embd_inp.size() <= input_consumed) { if ((int) embd_inp.size() <= input_consumed && !is_interacting) {
// out of user input, sample next token // out of user input, sample next token
const float top_k = params.top_k; const float top_k = params.top_k;
const float top_p = params.top_p; const float top_p = params.top_p;
@ -451,13 +451,16 @@ int main(int argc, char ** argv) {
} }
// Check if each of the reverse prompts appears at the end of the output. // Check if each of the reverse prompts appears at the end of the output.
for (std::string antiprompt : params.antiprompt) { for (std::string & antiprompt : params.antiprompt) {
if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) { if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) {
is_interacting = true; is_interacting = true;
set_console_state(CONSOLE_STATE_USER_INPUT);
fflush(stdout);
break; break;
} }
} }
if (is_interacting) {
if (n_past > 0 && is_interacting) {
// potentially set color to indicate we are taking user input // potentially set color to indicate we are taking user input
set_console_state(CONSOLE_STATE_USER_INPUT); set_console_state(CONSOLE_STATE_USER_INPUT);
@ -495,8 +498,11 @@ int main(int argc, char ** argv) {
input_noecho = true; // do not echo this again input_noecho = true; // do not echo this again
} }
if (n_past > 0) {
is_interacting = false; is_interacting = false;
} }
}
// end of text token // end of text token
if (embd.back() == llama_token_eos()) { if (embd.back() == llama_token_eos()) {