From 6443ddd98576a9da904ef9f07df4e4398bb6a01a Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 18 Sep 2024 13:42:36 +0200 Subject: [PATCH] llama : use reserve/emplace_back in sampler_sample (#9534) This commit updates the llama_sampler_sample function to use reserve and emplace_back for the vector of llama_token_data structs. The motivation for this change is to avoid the creation of n_vocab default-constructed llama_token_data structs which are then immediately overwritten. --- src/llama-sampling.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index 5275b1d60..5299f5116 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -236,9 +236,10 @@ llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_conte const int n_vocab = llama_n_vocab(llama_get_model(ctx)); // TODO: do not allocate each time - std::vector cur(n_vocab); + std::vector cur; + cur.reserve(n_vocab); for (llama_token token_id = 0; token_id < n_vocab; token_id++) { - cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f}; + cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f}); } llama_token_data_array cur_p = {