From ffd00797d81ef7db1528b9e10adbdc333ade6495 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Mon, 8 Jul 2024 02:31:55 -0400 Subject: [PATCH] common : avoid unnecessary logits fetch (#8358) --- common/sampling.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 9f332fe57..e9db2715f 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl( GGML_ASSERT(!original_logits.empty()); } llama_token id = 0; - // Get a pointer to the logits - float * logits = llama_get_logits_ith(ctx_main, idx); if (temp < 0.0) { // greedy sampling, with probs @@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl( } if (ctx_sampling->grammar != NULL && !is_resampling) { + // Get a pointer to the logits + float * logits = llama_get_logits_ith(ctx_main, idx); + // Create an array with a single token data element for the sampled id llama_token_data single_token_data = {id, logits[id], 0.0f}; llama_token_data_array single_token_data_array = { &single_token_data, 1, false };