server : fix incorrect num_tokens_predicted (#3480)

This commit is contained in:
Jhen-Jie Hong 2023-10-05 09:02:55 -05:00 committed by GitHub
parent 8f3a642ec1
commit e8b8d32e86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -504,9 +504,11 @@ struct llama_server_context
}); });
} }
bool tg = true;
while (n_past < embd.size()) while (n_past < embd.size())
{ {
int n_eval = (int)embd.size() - n_past; int n_eval = (int)embd.size() - n_past;
tg = n_eval == 1;
if (n_eval > params.n_batch) if (n_eval > params.n_batch)
{ {
n_eval = params.n_batch; n_eval = params.n_batch;
@ -633,8 +635,10 @@ struct llama_server_context
last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(result.tok); last_n_tokens.push_back(result.tok);
if (tg) {
num_tokens_predicted++; num_tokens_predicted++;
} }
}
// add it to the context // add it to the context
embd.push_back(result.tok); embd.push_back(result.tok);
@ -1124,8 +1128,6 @@ static json format_timings(llama_server_context &llama)
{ {
const auto timings = llama_get_timings(llama.ctx); const auto timings = llama_get_timings(llama.ctx);
assert(timings.n_eval == ptrdiff_t(llama.num_tokens_predicted));
return json{ return json{
{"prompt_n", timings.n_p_eval}, {"prompt_n", timings.n_p_eval},
{"prompt_ms", timings.t_p_eval_ms}, {"prompt_ms", timings.t_p_eval_ms},