mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
* Work around for recalculating logits in cached prompts
This commit is contained in:
parent
0e730dd23b
commit
248367605e
@ -360,6 +360,12 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
|
// check if we've used up all the prompt but not all cached tokens
|
||||||
|
if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
|
||||||
|
// force revaluation of the last token to recalculate logits
|
||||||
|
i--;
|
||||||
|
n_past--;
|
||||||
|
}
|
||||||
embd.erase(embd.begin(), embd.begin() + i);
|
embd.erase(embd.begin(), embd.begin() + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user