mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-14 06:49:54 +00:00
main : fix session loading bug (#3400)
This commit is contained in:
parent
a8bdd65525
commit
b8fe4b5cc9
@ -297,6 +297,9 @@ int main(int argc, char ** argv) {
|
|||||||
LOG_TEE("%s: session file matches %zu / %zu tokens of prompt\n",
|
LOG_TEE("%s: session file matches %zu / %zu tokens of prompt\n",
|
||||||
__func__, n_matching_session_tokens, embd_inp.size());
|
__func__, n_matching_session_tokens, embd_inp.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remove any "future" tokens that we might have inherited from the previous session
|
||||||
|
llama_kv_cache_tokens_rm(ctx, n_matching_session_tokens, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGLN(
|
LOGLN(
|
||||||
@ -545,9 +548,6 @@ int main(int argc, char ** argv) {
|
|||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
embd.erase(embd.begin(), embd.begin() + i);
|
embd.erase(embd.begin(), embd.begin() + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove any "future" tokens that we might have inherited from the session from the KV cache
|
|
||||||
llama_kv_cache_tokens_rm(ctx, n_past, -1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// evaluate tokens in batches
|
// evaluate tokens in batches
|
||||||
|
Loading…
Reference in New Issue
Block a user