From 0d4177126b0556e202efb85bf3f768be81076400 Mon Sep 17 00:00:00 2001 From: Elbios <141279586+Elbios@users.noreply.github.com> Date: Thu, 15 Feb 2024 09:01:57 +0100 Subject: [PATCH] llava : fix memory management bug (#5491) * Fix memory management in llava and server code Fixes this error: llama_new_context_with_model: graph splits (measure): 3 Available slots: -> Slot 0 - max context: 6000 {"timestamp":1707926446,"level":"INFO","function":"main","line":2623,"message":"model loaded"} all slots are idle and system prompt is empty, clear the KV cache slot 0 - loaded image slot 0 is processing [task id: 0] slot 0 : kv cache rm - [0, end) slot 0 - encoding image [id: 1] munmap_chunk(): invalid pointer Aborted * Make it cleaner by checking size in batch free wrapper --- examples/llava/clip.cpp | 24 +++++++++++++++++------- examples/llava/clip.h | 2 ++ examples/server/server.cpp | 11 +++++++++-- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 9c5091e61..2cad27e82 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -1230,8 +1230,20 @@ struct clip_image_f32 * clip_image_f32_init() { return new clip_image_f32(); } -void clip_image_u8_free (struct clip_image_u8 * img) { delete img; } +void clip_image_u8_free(struct clip_image_u8 * img) { delete img; } void clip_image_f32_free(struct clip_image_f32 * img) { delete img; } +void clip_image_u8_batch_free(struct clip_image_u8_batch & batch) { + if (batch.size > 0) { + delete[] batch.data; + batch.size = 0; + } +} +void clip_image_f32_batch_free(struct clip_image_f32_batch & batch) { + if (batch.size > 0) { + delete[] batch.data; + batch.size = 0; + } +} static void build_clip_img_from_data(const stbi_uc * data, int nx, int ny, clip_image_u8 * img) { img->nx = nx; @@ -1494,11 +1506,8 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli pad_to_square = false; } // free the previous res_imgs if any set - if (res_imgs.size > 0 && res_imgs.size < 100) { - for (size_t i = 0; i < res_imgs.size; i++) { - clip_image_f32_free(&(res_imgs.data[i])); - } - delete[] res_imgs.data; + if (res_imgs.size > 0) { + clip_image_f32_batch_free(res_imgs); } res_imgs.data = nullptr; res_imgs.size = 0; @@ -1650,7 +1659,8 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli res_imgs.size = 1; res_imgs.data = new clip_image_f32[res_imgs.size]; - res_imgs.data[0] = std::move(*res); + res_imgs.data[0] = *res; + clip_image_f32_free(res); return true; } diff --git a/examples/llava/clip.h b/examples/llava/clip.h index cd9a4022f..e5bd54924 100644 --- a/examples/llava/clip.h +++ b/examples/llava/clip.h @@ -60,6 +60,8 @@ CLIP_API struct clip_image_f32 * clip_image_f32_init(); CLIP_API void clip_image_u8_free (struct clip_image_u8 * img); CLIP_API void clip_image_f32_free(struct clip_image_f32 * img); +CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch & batch); +CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch & batch); CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6e3434030..2decd7762 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -975,7 +975,12 @@ struct llama_server_context { LOG_TEE("Error processing the given image"); clip_free(clp_ctx); - clip_image_f32_free(img_res_v.data); + clip_image_f32_batch_free(img_res_v); + return false; + } + if (img_res_v.size == 0) + { + LOG_TEE("Error processing the given image"); return false; } @@ -987,6 +992,7 @@ struct llama_server_context if (!img.image_embedding) { LOG_TEE("Unable to allocate memory for image embeddings\n"); + clip_image_f32_batch_free(img_res_v); clip_free(clp_ctx); return false; } @@ -994,10 +1000,11 @@ struct llama_server_context if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding)) { LOG_TEE("Unable to encode image\n"); + clip_image_f32_batch_free(img_res_v); return false; } - clip_image_f32_free(img_res_v.data); + clip_image_f32_batch_free(img_res_v); img.request_encode_image = false; }