mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-13 14:29:52 +00:00
llama : suggest reduce ctx size when kv init fails
This commit is contained in:
parent
bf60f27cda
commit
20e12112fd
@ -798,7 +798,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back
|
||||
void * data = ggml_aligned_malloc(size);
|
||||
|
||||
if (data == NULL) {
|
||||
GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);
|
||||
GGML_LOG_ERROR("%s: failed to allocate buffer of size %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -19520,6 +19520,7 @@ struct llama_context * llama_new_context_with_model(
|
||||
|
||||
if (!llama_kv_cache_init(ctx->kv_self, ctx, type_k, type_v, kv_size, cparams.offload_kqv)) {
|
||||
LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
|
||||
LLAMA_LOG_ERROR("%s: suggestion: try using a smaller context size (-c command line option or llama_context_params.n_ctx)\n", __func__);
|
||||
llama_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user