mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
Add LLAMA_DEFAULT_RMS_EPS so we can change the default (#2384)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
parent
07aaa0f63f
commit
eb542d3932
@ -8,7 +8,11 @@
|
|||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const float rms_norm_eps = 1e-6f;
|
#ifdef LLAMA_DEFAULT_RMS_EPS
|
||||||
|
static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
||||||
|
#else
|
||||||
|
static const float rms_norm_eps = 5e-6f;
|
||||||
|
#endif
|
||||||
|
|
||||||
float frand() {
|
float frand() {
|
||||||
return (float)rand()/(float)RAND_MAX;
|
return (float)rand()/(float)RAND_MAX;
|
||||||
|
@ -34,7 +34,7 @@ struct gpt_params {
|
|||||||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||||
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
|
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
|
||||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||||
float rms_norm_eps = 1e-6; // rms norm epsilon
|
float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; // rms norm epsilon
|
||||||
float rope_freq_base = 10000.0f; // RoPE base frequency
|
float rope_freq_base = 10000.0f; // RoPE base frequency
|
||||||
float rope_freq_scale = 1.0f; // RoPE frequency scaling factor
|
float rope_freq_scale = 1.0f; // RoPE frequency scaling factor
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const float rms_norm_eps = 1e-6f;
|
static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
||||||
|
|
||||||
struct random_normal_distribution {
|
struct random_normal_distribution {
|
||||||
std::mt19937 gen;
|
std::mt19937 gen;
|
||||||
|
@ -186,7 +186,7 @@ struct llama_hparams {
|
|||||||
// LLaMAv2
|
// LLaMAv2
|
||||||
// TODO: load from model data hparams
|
// TODO: load from model data hparams
|
||||||
float f_ffn_mult = 1.0f;
|
float f_ffn_mult = 1.0f;
|
||||||
float f_rms_norm_eps = 1e-6f;
|
float f_rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
||||||
|
|
||||||
float rope_freq_base = 10000.0f;
|
float rope_freq_base = 10000.0f;
|
||||||
float rope_freq_scale = 1.0f;
|
float rope_freq_scale = 1.0f;
|
||||||
@ -870,7 +870,7 @@ struct llama_context_params llama_context_default_params() {
|
|||||||
/*.n_ctx =*/ 512,
|
/*.n_ctx =*/ 512,
|
||||||
/*.n_batch =*/ 512,
|
/*.n_batch =*/ 512,
|
||||||
/*.n_gqa =*/ 1,
|
/*.n_gqa =*/ 1,
|
||||||
/*.rms_norm_eps =*/ 1e-6f,
|
/*.rms_norm_eps =*/ LLAMA_DEFAULT_RMS_EPS,
|
||||||
/*.gpu_layers =*/ 0,
|
/*.gpu_layers =*/ 0,
|
||||||
/*.main_gpu =*/ 0,
|
/*.main_gpu =*/ 0,
|
||||||
/*.tensor_split =*/ nullptr,
|
/*.tensor_split =*/ nullptr,
|
||||||
|
Loading…
Reference in New Issue
Block a user