llama : do not print "offloading layers" message in CPU-only builds (#5416)

2024-12-27 03:44:35 +00:00 · 2024-02-08 21:33:03 +01:00 · 2024-02-08 21:33:03 +01:00 · 41f308f58e
commit 41f308f58e
parent 6e99f2a04f
1 changed files with 5 additions and 5 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -4209,8 +4209,7 @@ static bool llm_load_tensors(
        ctx_bufs.emplace_back(ctx, buf);
    }
-    // print memory requirements
+    if (llama_supports_gpu_offload()) {
    {
        const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
        LLAMA_LOG_INFO("%s: offloading %d repeating layers to GPU\n", __func__, n_gpu);
@ -4222,11 +4221,12 @@ static bool llm_load_tensors(
        const int max_offloadable_layers       = hparams.n_layer + 1;
        LLAMA_LOG_INFO("%s: offloaded %d/%d layers to GPU\n", __func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers);
    }
    // print memory requirements
    for (ggml_backend_buffer_t buf : model.bufs) {
        LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0);
    }
    }
    // populate tensors_by_name
    for (ggml_context * ctx : model.ctxs) {