mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-11 13:30:35 +00:00
llama : use reserve/emplace_back in sampler_sample (#9534)
This commit updates the llama_sampler_sample function to use reserve and emplace_back for the vector of llama_token_data structs. The motivation for this change is to avoid the creation of n_vocab default-constructed llama_token_data structs which are then immediately overwritten.
This commit is contained in:
parent
8a308354f6
commit
6443ddd985
@ -236,9 +236,10 @@ llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_conte
|
|||||||
const int n_vocab = llama_n_vocab(llama_get_model(ctx));
|
const int n_vocab = llama_n_vocab(llama_get_model(ctx));
|
||||||
|
|
||||||
// TODO: do not allocate each time
|
// TODO: do not allocate each time
|
||||||
std::vector<llama_token_data> cur(n_vocab);
|
std::vector<llama_token_data> cur;
|
||||||
|
cur.reserve(n_vocab);
|
||||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||||
cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_token_data_array cur_p = {
|
llama_token_data_array cur_p = {
|
||||||
|
Loading…
Reference in New Issue
Block a user