mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
common : avoid unnecessary logits fetch (#8358)
This commit is contained in:
parent
04ce3a8b19
commit
ffd00797d8
@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
|
|||||||
GGML_ASSERT(!original_logits.empty());
|
GGML_ASSERT(!original_logits.empty());
|
||||||
}
|
}
|
||||||
llama_token id = 0;
|
llama_token id = 0;
|
||||||
// Get a pointer to the logits
|
|
||||||
float * logits = llama_get_logits_ith(ctx_main, idx);
|
|
||||||
|
|
||||||
if (temp < 0.0) {
|
if (temp < 0.0) {
|
||||||
// greedy sampling, with probs
|
// greedy sampling, with probs
|
||||||
@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
||||||
|
// Get a pointer to the logits
|
||||||
|
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||||
|
|
||||||
// Create an array with a single token data element for the sampled id
|
// Create an array with a single token data element for the sampled id
|
||||||
llama_token_data single_token_data = {id, logits[id], 0.0f};
|
llama_token_data single_token_data = {id, logits[id], 0.0f};
|
||||||
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
|
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
|
||||||
|
Loading…
Reference in New Issue
Block a user