mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
If first token generated from the server is the stop word the server will crash (#7038)
This will reproduce the issue in llama13b { 'prompt': 'Q: hello world \nA: ', 'stop': ['\n'], 'temperature': 0.0, 'n_predict': 10, 'cache_prompt': True, 'n_probs': 10 }
This commit is contained in:
parent
92139b90af
commit
03fb8a002d
@ -1383,9 +1383,10 @@ struct server_context {
|
|||||||
if (!slot.params.stream && slot.stopped_word) {
|
if (!slot.params.stream && slot.stopped_word) {
|
||||||
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
|
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
|
||||||
|
|
||||||
|
size_t safe_offset = std::min(slot.generated_token_probs.size(), stop_word_toks.size());
|
||||||
probs = std::vector<completion_token_output>(
|
probs = std::vector<completion_token_output>(
|
||||||
slot.generated_token_probs.begin(),
|
slot.generated_token_probs.begin(),
|
||||||
slot.generated_token_probs.end() - stop_word_toks.size());
|
slot.generated_token_probs.end() - safe_offset);
|
||||||
} else {
|
} else {
|
||||||
probs = std::vector<completion_token_output>(
|
probs = std::vector<completion_token_output>(
|
||||||
slot.generated_token_probs.begin(),
|
slot.generated_token_probs.begin(),
|
||||||
|
Loading…
Reference in New Issue
Block a user