common : avoid unnecessary logits fetch (#8358)

This commit is contained in:
Kevin Wang 2024-07-08 02:31:55 -04:00 committed by GitHub
parent 04ce3a8b19
commit ffd00797d8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
GGML_ASSERT(!original_logits.empty()); GGML_ASSERT(!original_logits.empty());
} }
llama_token id = 0; llama_token id = 0;
// Get a pointer to the logits
float * logits = llama_get_logits_ith(ctx_main, idx);
if (temp < 0.0) { if (temp < 0.0) {
// greedy sampling, with probs // greedy sampling, with probs
@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl(
} }
if (ctx_sampling->grammar != NULL && !is_resampling) { if (ctx_sampling->grammar != NULL && !is_resampling) {
// Get a pointer to the logits
float * logits = llama_get_logits_ith(ctx_main, idx);
// Create an array with a single token data element for the sampled id // Create an array with a single token data element for the sampled id
llama_token_data single_token_data = {id, logits[id], 0.0f}; llama_token_data single_token_data = {id, logits[id], 0.0f};
llama_token_data_array single_token_data_array = { &single_token_data, 1, false }; llama_token_data_array single_token_data_array = { &single_token_data, 1, false };