metal : release buffers when freeing metal context (#2062)

2024-12-25 02:44:36 +00:00 · 2023-07-01 11:14:59 -07:00 · 2023-07-01 11:14:59 -07:00 · 2f8cd979ec
commit 2f8cd979ec
parent 471aab6e4c
2 changed files with 10 additions and 2 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -202,7 +202,9 @@ struct ggml_metal_context * ggml_metal_init(void) {
 void ggml_metal_free(struct ggml_metal_context * ctx) {
    fprintf(stderr, "%s: deallocating\n", __func__);
-
+    for (int i = 0; i < ctx->n_buffers; ++i) {
        [ctx->buffers[i].metal release];
    }
    free(ctx);
 }
--- a/llama.cpp
+++ b/llama.cpp
@ -253,7 +253,13 @@ struct llama_model {
 struct llama_context {
    llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {}
-
+#ifdef GGML_USE_METAL
    ~llama_context() {
        if (ctx_metal) {
            ggml_metal_free(ctx_metal);
        }
    }
 #endif
    std::mt19937 rng;
    bool has_evaluated_once = false;