mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
llama : have n_batch default to 512 (#1091)
* set default n_batch to 512 when using BLAS * spacing * alternate implementation of setting different n_batch for BLAS * set n_batch to 512 for all cases
This commit is contained in:
parent
7e312f165c
commit
10f19c1121
@ -20,7 +20,7 @@ struct gpt_params {
|
|||||||
int32_t repeat_last_n = 64; // last n tokens to penalize
|
int32_t repeat_last_n = 64; // last n tokens to penalize
|
||||||
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
||||||
int32_t n_ctx = 512; // context size
|
int32_t n_ctx = 512; // context size
|
||||||
int32_t n_batch = 8; // batch size for prompt processing
|
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
|
|
||||||
// sampling parameters
|
// sampling parameters
|
||||||
|
Loading…
Reference in New Issue
Block a user