mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
common : avoid unnecessary logits fetch (#8358)
This commit is contained in:
parent
04ce3a8b19
commit
ffd00797d8
@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
|
||||
GGML_ASSERT(!original_logits.empty());
|
||||
}
|
||||
llama_token id = 0;
|
||||
// Get a pointer to the logits
|
||||
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||
|
||||
if (temp < 0.0) {
|
||||
// greedy sampling, with probs
|
||||
@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl(
|
||||
}
|
||||
|
||||
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
||||
// Get a pointer to the logits
|
||||
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||
|
||||
// Create an array with a single token data element for the sampled id
|
||||
llama_token_data single_token_data = {id, logits[id], 0.0f};
|
||||
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
|
||||
|
Loading…
Reference in New Issue
Block a user