mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
llama : use n_embd_gqa instead of n_embd to handle llama-2 70B (#2433)
This commit is contained in:
parent
edcc7ae7d2
commit
65cdf34bdc
@ -26,6 +26,7 @@ int main(int argc, char ** argv) {
|
|||||||
auto lparams = llama_context_default_params();
|
auto lparams = llama_context_default_params();
|
||||||
|
|
||||||
lparams.n_ctx = params.n_ctx;
|
lparams.n_ctx = params.n_ctx;
|
||||||
|
lparams.n_gqa = params.n_gqa;
|
||||||
lparams.seed = params.seed;
|
lparams.seed = params.seed;
|
||||||
lparams.f16_kv = params.memory_f16;
|
lparams.f16_kv = params.memory_f16;
|
||||||
lparams.use_mmap = params.use_mmap;
|
lparams.use_mmap = params.use_mmap;
|
||||||
|
@ -3663,7 +3663,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
|||||||
const auto & kv_self = ctx->kv_self;
|
const auto & kv_self = ctx->kv_self;
|
||||||
const auto & hparams = ctx->model.hparams;
|
const auto & hparams = ctx->model.hparams;
|
||||||
const int n_layer = hparams.n_layer;
|
const int n_layer = hparams.n_layer;
|
||||||
const int n_embd = hparams.n_embd;
|
const int n_embd = hparams.n_embd_gqa();
|
||||||
const int n_ctx = hparams.n_ctx;
|
const int n_ctx = hparams.n_ctx;
|
||||||
|
|
||||||
const size_t kv_size = kv_self.buf.size;
|
const size_t kv_size = kv_self.buf.size;
|
||||||
@ -3766,7 +3766,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
|||||||
const auto & kv_self = ctx->kv_self;
|
const auto & kv_self = ctx->kv_self;
|
||||||
const auto & hparams = ctx->model.hparams;
|
const auto & hparams = ctx->model.hparams;
|
||||||
const int n_layer = hparams.n_layer;
|
const int n_layer = hparams.n_layer;
|
||||||
const int n_embd = hparams.n_embd;
|
const int n_embd = hparams.n_embd_gqa();
|
||||||
const int n_ctx = hparams.n_ctx;
|
const int n_ctx = hparams.n_ctx;
|
||||||
|
|
||||||
size_t kv_size;
|
size_t kv_size;
|
||||||
|
Loading…
Reference in New Issue
Block a user