mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
llama : do a warm-up eval at start for better timings (#1824)
This commit is contained in:
parent
74d4cfa343
commit
2347e45e7b
@ -331,6 +331,13 @@ int main(int argc, char ** argv) {
|
||||
|
||||
std::vector<llama_token> embd;
|
||||
|
||||
// do one empty run to warm up the model
|
||||
{
|
||||
const std::vector<llama_token> tmp = { llama_token_bos(), };
|
||||
llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
|
||||
llama_reset_timings(ctx);
|
||||
}
|
||||
|
||||
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
||||
// predict
|
||||
if (embd.size() > 0) {
|
||||
|
Loading…
Reference in New Issue
Block a user