llama.cpp/examples/llava/minicpmv-surgery.py

import argparse
import os
import torch
from transformers import AutoModel, AutoTokenizer

ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", help="Path to MiniCPM-V-2.5 model")
args = ap.parse_args()

# find the model part that includes the the multimodal projector weights
model = AutoModel.from_pretrained(args.model, trust_remote_code=True, local_files_only=True)
checkpoint = model.state_dict()

# get a list of mm tensor names
mm_tensors = [k for k, v in checkpoint.items() if k.startswith("resampler")]

# store these tensors in a new dictionary and torch.save them
projector = {name: checkpoint[name].float() for name in mm_tensors}
torch.save(projector, f"{args.model}/minicpmv.projector")

clip_tensors = [k for k, v in checkpoint.items() if k.startswith("vpm")]
if len(clip_tensors) > 0:
    clip = {name.replace("vpm.", ""): checkpoint[name].float() for name in clip_tensors}
    torch.save(clip, f"{args.model}/minicpmv.clip")

    # added tokens should be removed to be able to convert Mistral models
    if os.path.exists(f"{args.model}/added_tokens.json"):
        with open(f"{args.model}/added_tokens.json", "w") as f:
            f.write("{}\n")

config = model.llm.config
config._name_or_path = "openbmb/MiniCPM-Llama3-V-2.5"
config.auto_map = {
    "AutoConfig": "configuration_minicpm.MiniCPMConfig",
    "AutoModel": "modeling_minicpm.MiniCPMModel",
    "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM",
    "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM",
    "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"
}
model.llm.save_pretrained(f"{args.model}/model")
tok = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
tok.save_pretrained(f"{args.model}/model")
# os.system(f"cp {args.model}/modeling_minicpm.py {args.model}/MiniCPM_l3/modeling_minicpm.py")

print("Done!")
print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")
print(f"Also, use {args.model}/minicpmv.projector to prepare a minicpmv-encoder.gguf file.")
llava : support MiniCPM-V-2.5 (#7599) * init * rename * add run android for termux in readme * add android readme * add instructions in readme * change name in readme * Update README.md * fixed line * add result in readme * random pos_embed * add positions index * change for ollama * change for ollama * better pos_embed in clip * support ollama * updata cmakelist * updata cmakelist * rename wrapper * clear code * replace and organize code * add link * sync master * fix warnings * fix warnings * fix bug in bicubic resize when need resize iamge smaller * receive review comments and modify * receive review comments and modify * put all code into llava dir * fix quality problem in pr code * change n_layer * add space in "-1" * imitate reshape bug of python code * fix bug in clip * fix issues for merging * fix llama-minicpmv-cli in cmake file * change pr readme * fix code review * remove in line 33 directory in the /cmakelists.txt (not in example, in the main dir * fix cmakefile * add warn * fix KEY_HAS_MINICPMV_PROJ * remove load_image_size into clip_ctx * remove the extern "C", MINICPMV_API * fix uhd code for review comment * delete minicpmv-wrapper in pr * remove uhd_image_embed * Modify 2 notes * clip : style changes * del common.h in clip * fix Type-Check error * fix Type-Check error * fix Type-Check error * fix Type-Check error * fix makefile error * fix ubuntu-make error * try fix clip * try fix 1 --------- Co-authored-by: Hongji Zhu <fireyoucan@gmail.com> Co-authored-by: harvestingmoon <leewenyeong@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-08-09 10:33:53 +00:00			`import argparse`
			`import os`
			`import torch`
			`from transformers import AutoModel, AutoTokenizer`

			`ap = argparse.ArgumentParser()`
			`ap.add_argument("-m", "--model", help="Path to MiniCPM-V-2.5 model")`
			`args = ap.parse_args()`

			`# find the model part that includes the the multimodal projector weights`
			`model = AutoModel.from_pretrained(args.model, trust_remote_code=True, local_files_only=True)`
			`checkpoint = model.state_dict()`

			`# get a list of mm tensor names`
			`mm_tensors = [k for k, v in checkpoint.items() if k.startswith("resampler")]`

			`# store these tensors in a new dictionary and torch.save them`
			`projector = {name: checkpoint[name].float() for name in mm_tensors}`
			`torch.save(projector, f"{args.model}/minicpmv.projector")`

			`clip_tensors = [k for k, v in checkpoint.items() if k.startswith("vpm")]`
			`if len(clip_tensors) > 0:`
			`clip = {name.replace("vpm.", ""): checkpoint[name].float() for name in clip_tensors}`
			`torch.save(clip, f"{args.model}/minicpmv.clip")`

			`# added tokens should be removed to be able to convert Mistral models`
			`if os.path.exists(f"{args.model}/added_tokens.json"):`
			`with open(f"{args.model}/added_tokens.json", "w") as f:`
			`f.write("{}\n")`

			`config = model.llm.config`
			`config._name_or_path = "openbmb/MiniCPM-Llama3-V-2.5"`
			`config.auto_map = {`
			`"AutoConfig": "configuration_minicpm.MiniCPMConfig",`
			`"AutoModel": "modeling_minicpm.MiniCPMModel",`
			`"AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM",`
			`"AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM",`
			`"AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"`
			`}`
			`model.llm.save_pretrained(f"{args.model}/model")`
			`tok = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)`
			`tok.save_pretrained(f"{args.model}/model")`
			`# os.system(f"cp {args.model}/modeling_minicpm.py {args.model}/MiniCPM_l3/modeling_minicpm.py")`

			`print("Done!")`
			`print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")`
			`print(f"Also, use {args.model}/minicpmv.projector to prepare a minicpmv-encoder.gguf file.")`