common : avoid unnecessary logits fetch (#8358)

2024-12-25 02:44:36 +00:00 · 2024-07-08 02:31:55 -04:00 · 2024-07-08 02:31:55 -04:00 · ffd00797d8
commit ffd00797d8
parent 04ce3a8b19
1 changed files with 3 additions and 2 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
        GGML_ASSERT(!original_logits.empty());
    }
    llama_token id = 0;
    // Get a pointer to the logits
    float * logits = llama_get_logits_ith(ctx_main, idx);
    if (temp < 0.0) {
        // greedy sampling, with probs
@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl(
    }
    if (ctx_sampling->grammar != NULL && !is_resampling) {
        // Get a pointer to the logits
        float * logits = llama_get_logits_ith(ctx_main, idx);
        // Create an array with a single token data element for the sampled id
        llama_token_data single_token_data = {id, logits[id], 0.0f};
        llama_token_data_array single_token_data_array = { &single_token_data, 1, false };