cont : no need for special "greedy" logic

top-k == 1 is the same
2025-01-10 10:41:47 +00:00 · 2024-10-17 18:09:57 +03:00 · 2024-10-17 18:09:57 +03:00 · 57fb835e5b
commit 57fb835e5b
parent cb75bebcad
1 changed files with 37 additions and 50 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -171,7 +171,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                params.penalize_nl,
                params.ignore_eos));
    if (params.temp >= 0.0f) {
    if (params.mirostat == 0) {
        for (const auto & cnstr : params.samplers) {
            switch (cnstr) {
@ -213,18 +212,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
    } else {
        GGML_ASSERT(false && "unknown mirostat version");
    }
    } else {
        // negative temperatures will trigger "greedy" sampling: simply take the most likely token each time
        if (params.n_probs > 0) {
            // some use cases require to sample greedily, but still obtain the probabilities of the top tokens
            // ref: https://github.com/ggerganov/llama.cpp/pull/9605
            //
            // the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
            // it is much faster, since we avoid sorting all tokens and should give a good approximation
            llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.n_probs));
        }
        llama_sampler_chain_add(result->chain, llama_sampler_init_greedy());
    }
    return result;
 }