mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-11 21:39:52 +00:00
server : fix incorrect num_tokens_predicted (#3480)
This commit is contained in:
parent
8f3a642ec1
commit
e8b8d32e86
@ -504,9 +504,11 @@ struct llama_server_context
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool tg = true;
|
||||||
while (n_past < embd.size())
|
while (n_past < embd.size())
|
||||||
{
|
{
|
||||||
int n_eval = (int)embd.size() - n_past;
|
int n_eval = (int)embd.size() - n_past;
|
||||||
|
tg = n_eval == 1;
|
||||||
if (n_eval > params.n_batch)
|
if (n_eval > params.n_batch)
|
||||||
{
|
{
|
||||||
n_eval = params.n_batch;
|
n_eval = params.n_batch;
|
||||||
@ -633,8 +635,10 @@ struct llama_server_context
|
|||||||
|
|
||||||
last_n_tokens.erase(last_n_tokens.begin());
|
last_n_tokens.erase(last_n_tokens.begin());
|
||||||
last_n_tokens.push_back(result.tok);
|
last_n_tokens.push_back(result.tok);
|
||||||
|
if (tg) {
|
||||||
num_tokens_predicted++;
|
num_tokens_predicted++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// add it to the context
|
// add it to the context
|
||||||
embd.push_back(result.tok);
|
embd.push_back(result.tok);
|
||||||
@ -1124,8 +1128,6 @@ static json format_timings(llama_server_context &llama)
|
|||||||
{
|
{
|
||||||
const auto timings = llama_get_timings(llama.ctx);
|
const auto timings = llama_get_timings(llama.ctx);
|
||||||
|
|
||||||
assert(timings.n_eval == ptrdiff_t(llama.num_tokens_predicted));
|
|
||||||
|
|
||||||
return json{
|
return json{
|
||||||
{"prompt_n", timings.n_p_eval},
|
{"prompt_n", timings.n_p_eval},
|
||||||
{"prompt_ms", timings.t_p_eval_ms},
|
{"prompt_ms", timings.t_p_eval_ms},
|
||||||
|
Loading…
Reference in New Issue
Block a user