rename

2025-01-12 19:50:17 +00:00 · 2024-05-23 20:00:45 +08:00 · 2024-05-23 20:00:45 +08:00 · c536fa6ef9
commit c536fa6ef9
parent 7a49a6f6dc
2 changed files with 12 additions and 12 deletions
--- a/examples/minicpmv/README.md
+++ b/examples/minicpmv/README.md
@ -6,14 +6,14 @@ make
 make minicpmv-cli

 python ./examples/minicpmv/minicpmv-surgery.py -m ../MiniCPM-V-2_5
-python ./examples/minicpmv/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-2_5 --llava-projector ../MiniCPM-V-2_5/llava.projector --output-dir ../MiniCPM-V-2_5/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5
+python ./examples/minicpmv/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-2_5 --minicpmv-projector ../MiniCPM-V-2_5/minicpmv.projector --output-dir ../MiniCPM-V-2_5/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5
 python ./convert.py ../MiniCPM-V-2_5/model  --outtype f16 --vocab-type bpe
-./minicpmv-cli -m ../MiniCPM-V-2_5/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
+./minicpmv-cli -m ../MiniCPM-V-2_5/model/model-8B-F16.gguf --mmproj ../MiniCPM-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"

 # or run quantize int4 version
-./quantize ../MiniCPM-V-2_5/model/ggml-model-f16.gguf ../MiniCPM-V-2_5/model/ggml-model-Q4_K_M.gguf Q4_K_M
-./minicpmv-cli -m ../MiniCPM-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.6 --top-p 0.8 --top-k 100 --repeat-penalty 1.0 --image xx.jpg  -p "What is in the image?"
+./quantize ../MiniCPM-V-2_5/model/model-8B-F16.gguf ../MiniCPM-V-2_5/model/ggml-model-Q4_K_M.gguf Q4_K_M
+./minicpmv-cli -m ../MiniCPM-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?"

 # or run in interactive mode
-./minicpmv-cli -m ../MiniCPM-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.6 --top-p 0.8 --top-k 100 --repeat-penalty 1.0 --image xx.jpg -i
+./minicpmv-cli -m ../MiniCPM-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i
 ```
--- a/examples/minicpmv/clip.cpp
+++ b/examples/minicpmv/clip.cpp
@ -75,7 +75,7 @@ static std::string format(const char * fmt, ...) {
 #define KEY_DESCRIPTION    "general.description"
 #define KEY_HAS_TEXT_ENC   "clip.has_text_encoder"
 #define KEY_HAS_VIS_ENC    "clip.has_vision_encoder"
-#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector"
+#define KEY_HAS_LLAVA_PROJ "clip.has_minicpmv_projector"
 #define KEY_USE_GELU       "clip.use_gelu"
 #define KEY_N_EMBD         "clip.%s.embedding_length"
 #define KEY_N_FF           "clip.%s.feed_forward_length"
@ -526,7 +526,7 @@ struct clip_vision_model {
 struct clip_ctx {
    bool has_text_encoder    = false;
    bool has_vision_encoder  = false;
-    bool has_llava_projector = false;
+    bool has_minicpmv_projector = false;

    struct clip_vision_model vision_model;
    projector_type proj_type = PROJECTOR_TYPE_MLP;
@ -606,7 +606,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32

    const int batch_size = imgs->size;

-    if (ctx->has_llava_projector) {
+    if (ctx->has_minicpmv_projector) {
        GGML_ASSERT(batch_size == 1);
    }

@ -1124,10 +1124,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1, s

        idx = gguf_find_key(ctx, KEY_HAS_LLAVA_PROJ);
        if (idx != -1) {
-            new_clip->has_llava_projector = gguf_get_val_bool(ctx, idx);
+            new_clip->has_minicpmv_projector = gguf_get_val_bool(ctx, idx);
        }

-        GGML_ASSERT(new_clip->has_llava_projector); // see monatis/clip.cpp for image and/or text encoding for semantic search
+        GGML_ASSERT(new_clip->has_minicpmv_projector); // see monatis/clip.cpp for image and/or text encoding for semantic search
        GGML_ASSERT(new_clip->has_vision_encoder);
        GGML_ASSERT(!new_clip->has_text_encoder);

@ -1137,7 +1137,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1, s
        if (verbosity >= 1) {
            LOG_TEE("%s: text_encoder:   %d\n", __func__, new_clip->has_text_encoder);
            LOG_TEE("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder);
-            LOG_TEE("%s: llava_projector:  %d\n", __func__, new_clip->has_llava_projector);
+            LOG_TEE("%s: llava_projector:  %d\n", __func__, new_clip->has_minicpmv_projector);
            LOG_TEE("%s: model size:     %.2f MB\n", __func__, model_size / 1024.0 / 1024.0);
            LOG_TEE("%s: metadata size:  %.2f MB\n", __func__, ggml_get_mem_size(meta) / 1024.0 / 1024.0);
        }
@ -1939,7 +1939,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
    }

    int batch_size = imgs->size;
-    if (ctx->has_llava_projector) {
+    if (ctx->has_minicpmv_projector) {
        GGML_ASSERT(batch_size == 1); // TODO: support multiple images
    }