diff --git a/examples/minicpmv/clip.cpp b/examples/minicpmv/clip.cpp index cc7a137e0..d7ec7e5b1 100644 --- a/examples/minicpmv/clip.cpp +++ b/examples/minicpmv/clip.cpp @@ -3,6 +3,7 @@ // I'll gradually clean and extend it // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch #include "clip.h" +#include "common.h" #include "log.h" #include "ggml.h" #include "ggml-alloc.h" diff --git a/examples/minicpmv/minicpmv.cpp b/examples/minicpmv/minicpmv.cpp index 916da9e5e..323c7a7d2 100644 --- a/examples/minicpmv/minicpmv.cpp +++ b/examples/minicpmv/minicpmv.cpp @@ -466,7 +466,7 @@ static bool bicubic_resize(const clip_image_u8 &img, clip_image_u8 &dst, int tar return true; } -std::vector> slice_image(const clip_image_u8 * img, const int max_slice_nums=9, const int scale_resolution=448, const int patch_size=14, const bool never_split=false) { +std::vector> slice_image(const clip_image_u8 * img, const int max_slice_nums, const int scale_resolution, const int patch_size, const bool never_split) { const std::pair original_size={img->nx,img->ny}; const int original_width = img->nx; const int original_height = img->ny; diff --git a/examples/minicpmv/minicpmv.h b/examples/minicpmv/minicpmv.h index 3a1a9b6e6..fe6c5f0cc 100644 --- a/examples/minicpmv/minicpmv.h +++ b/examples/minicpmv/minicpmv.h @@ -34,6 +34,7 @@ MINICPMV_API bool llava_validate_embed_size(const struct llama_context * ctx_lla MINICPMV_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out); /** build an image embed from image file bytes */ +MINICPMV_API std::vector> slice_image(const clip_image_u8 * img, const int max_slice_nums=9, const int scale_resolution=448, const int patch_size=14, const bool never_split=false); MINICPMV_API std::vector> llava_image_embed_make_with_bytes_slice(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); /** build an image embed from a path to an image filename */ MINICPMV_API std::vector> llava_image_embed_make_with_filename_slice(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);