mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
metal : release buffers when freeing metal context (#2062)
This commit is contained in:
parent
471aab6e4c
commit
2f8cd979ec
@ -202,7 +202,9 @@ struct ggml_metal_context * ggml_metal_init(void) {
|
|||||||
|
|
||||||
void ggml_metal_free(struct ggml_metal_context * ctx) {
|
void ggml_metal_free(struct ggml_metal_context * ctx) {
|
||||||
fprintf(stderr, "%s: deallocating\n", __func__);
|
fprintf(stderr, "%s: deallocating\n", __func__);
|
||||||
|
for (int i = 0; i < ctx->n_buffers; ++i) {
|
||||||
|
[ctx->buffers[i].metal release];
|
||||||
|
}
|
||||||
free(ctx);
|
free(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -253,7 +253,13 @@ struct llama_model {
|
|||||||
|
|
||||||
struct llama_context {
|
struct llama_context {
|
||||||
llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {}
|
llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {}
|
||||||
|
#ifdef GGML_USE_METAL
|
||||||
|
~llama_context() {
|
||||||
|
if (ctx_metal) {
|
||||||
|
ggml_metal_free(ctx_metal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
std::mt19937 rng;
|
std::mt19937 rng;
|
||||||
|
|
||||||
bool has_evaluated_once = false;
|
bool has_evaluated_once = false;
|
||||||
|
Loading…
Reference in New Issue
Block a user