mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
llama : fix kv_cache n
init (close #1903)
This commit is contained in:
parent
86c7571864
commit
051e1b0e6a
1
.gitignore
vendored
1
.gitignore
vendored
@ -34,6 +34,7 @@ models/*
|
|||||||
/perplexity
|
/perplexity
|
||||||
/embedding
|
/embedding
|
||||||
/train-text-from-scratch
|
/train-text-from-scratch
|
||||||
|
/simple
|
||||||
/benchmark-matmult
|
/benchmark-matmult
|
||||||
/vdot
|
/vdot
|
||||||
/server
|
/server
|
||||||
|
@ -38,6 +38,7 @@ else()
|
|||||||
add_subdirectory(benchmark)
|
add_subdirectory(benchmark)
|
||||||
add_subdirectory(baby-llama)
|
add_subdirectory(baby-llama)
|
||||||
add_subdirectory(train-text-from-scratch)
|
add_subdirectory(train-text-from-scratch)
|
||||||
|
add_subdirectory(simple)
|
||||||
if (LLAMA_METAL)
|
if (LLAMA_METAL)
|
||||||
add_subdirectory(metal)
|
add_subdirectory(metal)
|
||||||
endif()
|
endif()
|
||||||
|
@ -886,6 +886,7 @@ static bool kv_cache_init(
|
|||||||
const int64_t n_elements = n_embd*n_mem;
|
const int64_t n_elements = n_embd*n_mem;
|
||||||
|
|
||||||
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
|
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
|
||||||
|
cache.n = 0;
|
||||||
|
|
||||||
struct ggml_init_params params;
|
struct ggml_init_params params;
|
||||||
params.mem_size = cache.buf.size;
|
params.mem_size = cache.buf.size;
|
||||||
@ -904,6 +905,7 @@ static bool kv_cache_init(
|
|||||||
ggml_set_name(cache.k, "cache_k");
|
ggml_set_name(cache.k, "cache_k");
|
||||||
ggml_set_name(cache.v, "cache_v");
|
ggml_set_name(cache.v, "cache_v");
|
||||||
|
|
||||||
|
(void) n_gpu_layers;
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
if (n_gpu_layers > n_layer + 1) {
|
if (n_gpu_layers > n_layer + 1) {
|
||||||
ggml_cuda_assign_buffers_no_scratch(cache.v);
|
ggml_cuda_assign_buffers_no_scratch(cache.v);
|
||||||
|
Loading…
Reference in New Issue
Block a user