mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
server : add dynatemp_range
and dynatemp_exponent
(#5352)
* server: added `dynatemp_range` and `dynatemp_exponent` * Update README.md --------- Co-authored-by: Michael Coppola <info@michaeljcoppola.com>
This commit is contained in:
parent
4ffc7a17d4
commit
31e7903221
@ -137,6 +137,10 @@ node index.js
|
|||||||
|
|
||||||
`temperature`: Adjust the randomness of the generated text (default: 0.8).
|
`temperature`: Adjust the randomness of the generated text (default: 0.8).
|
||||||
|
|
||||||
|
`dynatemp_range`: Dynamic temperature range (default: 0.0, 0.0 = disabled).
|
||||||
|
|
||||||
|
`dynatemp_exponent`: Dynamic temperature exponent (default: 1.0).
|
||||||
|
|
||||||
`top_k`: Limit the next token selection to the K most probable tokens (default: 40).
|
`top_k`: Limit the next token selection to the K most probable tokens (default: 40).
|
||||||
|
|
||||||
`top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
|
`top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
|
||||||
|
@ -524,27 +524,29 @@ struct llama_server_context
|
|||||||
slot->oaicompat_model = "";
|
slot->oaicompat_model = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
slot->params.stream = json_value(data, "stream", false);
|
slot->params.stream = json_value(data, "stream", false);
|
||||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||||
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||||
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
||||||
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||||
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
||||||
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||||
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
||||||
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||||
slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
|
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
||||||
slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
|
slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
||||||
slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present);
|
slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
|
||||||
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
|
||||||
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present);
|
||||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||||
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
slot->params.seed = json_value(data, "seed", default_params.seed);
|
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->params.seed = json_value(data, "seed", default_params.seed);
|
||||||
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
|
|
||||||
// infill
|
// infill
|
||||||
if (data.count("input_prefix") != 0)
|
if (data.count("input_prefix") != 0)
|
||||||
@ -1002,6 +1004,8 @@ struct llama_server_context
|
|||||||
{"model", params.model_alias},
|
{"model", params.model_alias},
|
||||||
{"seed", slot.params.seed},
|
{"seed", slot.params.seed},
|
||||||
{"temperature", slot.sparams.temp},
|
{"temperature", slot.sparams.temp},
|
||||||
|
{"dynatemp_range", slot.sparams.dynatemp_range},
|
||||||
|
{"dynatemp_exponent", slot.sparams.dynatemp_exponent},
|
||||||
{"top_k", slot.sparams.top_k},
|
{"top_k", slot.sparams.top_k},
|
||||||
{"top_p", slot.sparams.top_p},
|
{"top_p", slot.sparams.top_p},
|
||||||
{"min_p", slot.sparams.min_p},
|
{"min_p", slot.sparams.min_p},
|
||||||
|
Loading…
Reference in New Issue
Block a user