mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-10 10:41:47 +00:00
cont : no need for special "greedy" logic
top-k == 1 is the same
This commit is contained in:
parent
cb75bebcad
commit
57fb835e5b
@ -171,7 +171,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|||||||
params.penalize_nl,
|
params.penalize_nl,
|
||||||
params.ignore_eos));
|
params.ignore_eos));
|
||||||
|
|
||||||
if (params.temp >= 0.0f) {
|
|
||||||
if (params.mirostat == 0) {
|
if (params.mirostat == 0) {
|
||||||
for (const auto & cnstr : params.samplers) {
|
for (const auto & cnstr : params.samplers) {
|
||||||
switch (cnstr) {
|
switch (cnstr) {
|
||||||
@ -213,18 +212,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|||||||
} else {
|
} else {
|
||||||
GGML_ASSERT(false && "unknown mirostat version");
|
GGML_ASSERT(false && "unknown mirostat version");
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// negative temperatures will trigger "greedy" sampling: simply take the most likely token each time
|
|
||||||
if (params.n_probs > 0) {
|
|
||||||
// some use cases require to sample greedily, but still obtain the probabilities of the top tokens
|
|
||||||
// ref: https://github.com/ggerganov/llama.cpp/pull/9605
|
|
||||||
//
|
|
||||||
// the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
|
|
||||||
// it is much faster, since we avoid sorting all tokens and should give a good approximation
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.n_probs));
|
|
||||||
}
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_greedy());
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user