mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
Fix prompt cache saving and chat-persistent rollover (#1678)
* Fix prompt cache saving and chat-persistent rollover (fixes #1670) * clang-tidy Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
ffb06a345e
commit
136476e898
@ -202,6 +202,13 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
// if we will use the cache for the full prompt without reaching the end of the cache, force
|
||||
// reevaluation of the last token token to recalculate the cached logits
|
||||
if (!embd_inp.empty() && n_matching_session_tokens == embd_inp.size() &&
|
||||
session_tokens.size() > embd_inp.size()) {
|
||||
session_tokens.resize(embd_inp.size() - 1);
|
||||
}
|
||||
|
||||
// number of tokens to keep when resetting context
|
||||
if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct) {
|
||||
params.n_keep = (int)embd_inp.size();
|
||||
@ -360,12 +367,6 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
if (i > 0) {
|
||||
// check if we've used up all the prompt but not all cached tokens
|
||||
if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
|
||||
// force revaluation of the last token to recalculate logits
|
||||
i--;
|
||||
n_past--;
|
||||
}
|
||||
embd.erase(embd.begin(), embd.begin() + i);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user