mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 20:04:35 +00:00
common : change defaults [no ci]
This commit is contained in:
parent
7f9cc2058c
commit
4eb126fff0
@ -157,8 +157,8 @@ struct common_params_sampling {
|
||||
|
||||
struct common_params_speculative {
|
||||
int32_t n_ctx = 4096; // draft context size
|
||||
int32_t n_max = 5; // maximum number of tokens to draft during speculative decoding
|
||||
int32_t n_min = 0; // minimum number of draft tokens to use for speculative decoding
|
||||
int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding
|
||||
int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding
|
||||
int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
||||
float p_split = 0.1f; // speculative decoding split probability
|
||||
float p_min = 0.9f; // minimum speculative decoding probability (greedy)
|
||||
|
Loading…
Reference in New Issue
Block a user