llama : suggest reduce ctx size when kv init fails

This commit is contained in:
slaren 2024-11-02 00:55:19 +01:00
parent bf60f27cda
commit 20e12112fd
2 changed files with 2 additions and 1 deletions

View File

@ -798,7 +798,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back
void * data = ggml_aligned_malloc(size);
if (data == NULL) {
GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);
GGML_LOG_ERROR("%s: failed to allocate buffer of size %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
return NULL;
}

View File

@ -19520,6 +19520,7 @@ struct llama_context * llama_new_context_with_model(
if (!llama_kv_cache_init(ctx->kv_self, ctx, type_k, type_v, kv_size, cparams.offload_kqv)) {
LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
LLAMA_LOG_ERROR("%s: suggestion: try using a smaller context size (-c command line option or llama_context_params.n_ctx)\n", __func__);
llama_free(ctx);
return nullptr;
}