mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 18:34:36 +00:00
llava : MobileVLM support (#4954)
* MobileVLM native implementation * delete depthwise_conv_2d and permute_cpy relative code, replace the two by the existed functions, and opt ldp definition, support LLAMA_PERF option for CMake * move android script to example/llava directory * Fix the editor config checks --------- Co-authored-by: Chenxiaotao03 <chenxiaotao03@meituan.com>
This commit is contained in:
parent
b2d80e105a
commit
3ce7e8f8e7
@ -108,6 +108,13 @@ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STA
|
|||||||
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
||||||
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
|
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
|
||||||
|
|
||||||
|
|
||||||
|
# add perf arguments
|
||||||
|
option(LLAMA_PERF "llama: enable perf" OFF)
|
||||||
|
if (LLAMA_PERF)
|
||||||
|
add_definitions(-DGGML_PERF)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Required for relocatable CMake package
|
# Required for relocatable CMake package
|
||||||
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
|
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
|
||||||
|
|
||||||
|
131
examples/llava/MobileVLM-README.md
Normal file
131
examples/llava/MobileVLM-README.md
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
# MobileVLM
|
||||||
|
|
||||||
|
Currently this implementation supports [MobileVLM-v1.7](https://huggingface.co/mtgv/MobileVLM-1.7B) variants.
|
||||||
|
|
||||||
|
for more information, please go to [Meituan-AutoML/MobileVLM](https://github.com/Meituan-AutoML/MobileVLM)
|
||||||
|
|
||||||
|
The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
Build with cmake or run `make llava-cli` to build it.
|
||||||
|
|
||||||
|
After building, run: `./llava-cli` to see the usage. For example:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./llava-cli -m MobileVLM-1.7B/ggml-model-q4_k.gguf \
|
||||||
|
--mmproj MobileVLM-1.7B/mmproj-model-f16.gguf \
|
||||||
|
--image path/to/an/image.jpg \
|
||||||
|
-p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWho is the author of this book? Answer the question using a single word or phrase. ASSISTANT:"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Model conversion
|
||||||
|
|
||||||
|
- Clone `mobileVLM-1.7B` and `clip-vit-large-patch14-336` locally:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git clone https://huggingface.co/mtgv/MobileVLM-1.7B
|
||||||
|
|
||||||
|
git clone https://huggingface.co/openai/clip-vit-large-patch14-336
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Use `llava-surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
python ./examples/llava/llava-surgery.py -m path/to/MobileVLM-1.7B
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Use `convert-image-encoder-to-gguf.py` with `--projector-type ldp` to convert the LLaVA image encoder to GGUF:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
python ./examples/llava/convert-image-encoder-to-gguf \
|
||||||
|
-m path/to/clip-vit-large-patch14-336 \
|
||||||
|
--llava-projector path/to/MobileVLM-1.7B/llava.projector \
|
||||||
|
--output-dir path/to/MobileVLM-1.7B \
|
||||||
|
--projector-type ldp
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Use `convert.py` to convert the LLaMA part of LLaVA to GGUF:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
python ./convert.py path/to/MobileVLM-1.7B
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Use `quantize` to convert LLaMA part's DataType from `fp16` to `q4_k`
|
||||||
|
```sh
|
||||||
|
./quantize path/to/MobileVLM-1.7B/ggml-model-f16.gguf path/to/MobileVLM-1.7B/ggml-model-q4_k.gguf q4_k_s
|
||||||
|
```
|
||||||
|
|
||||||
|
Now both the LLaMA part and the image encoder is in the `MobileVLM-1.7B` directory.
|
||||||
|
|
||||||
|
## Android compile and run
|
||||||
|
### compile
|
||||||
|
refer to `examples/llava/android/build_64.sh`
|
||||||
|
```sh
|
||||||
|
mkdir examples/llava/android/build_64
|
||||||
|
cd examples/llava/android/build_64
|
||||||
|
../build_64.sh
|
||||||
|
```
|
||||||
|
### run on Android
|
||||||
|
refer to `android/adb_run.sh`, modify resources' `name` and `path`
|
||||||
|
|
||||||
|
## some result on Android with `Snapdragon 888` chip
|
||||||
|
### case 1
|
||||||
|
**input**
|
||||||
|
```sh
|
||||||
|
/data/local/tmp/llava-cli \
|
||||||
|
-m /data/local/tmp/ggml-model-q4_k.gguf \
|
||||||
|
--mmproj /data/local/tmp/mmproj-model-f16.gguf \
|
||||||
|
-t 4 \
|
||||||
|
--image /data/local/tmp/demo.jpg \
|
||||||
|
-p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWho is the author of this book? \nAnswer the question using a single word or phrase. ASSISTANT:"
|
||||||
|
```
|
||||||
|
**output**
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image encoded in 21148.71 ms by CLIP ( 146.87 ms per image patch)
|
||||||
|
Susan Wise Bauer
|
||||||
|
llama_print_timings: load time = 23574.72 ms
|
||||||
|
llama_print_timings: sample time = 1.24 ms / 6 runs ( 0.21 ms per token, 4850.44 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 12460.15 ms / 246 tokens ( 50.65 ms per token, 19.74 tokens per second)
|
||||||
|
llama_print_timings: eval time = 424.86 ms / 6 runs ( 70.81 ms per token, 14.12 tokens per second)
|
||||||
|
llama_print_timings: total time = 34731.93 ms
|
||||||
|
```
|
||||||
|
### case 2
|
||||||
|
**input**
|
||||||
|
```sh
|
||||||
|
/data/local/tmp/llava-cli \
|
||||||
|
-m /data/local/tmp/ggml-model-q4_k.gguf \
|
||||||
|
--mmproj /data/local/tmp/mmproj-model-f16.gguf \
|
||||||
|
-t 4 \
|
||||||
|
--image /data/local/tmp/cat.jpeg \
|
||||||
|
-p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat is in the image? ASSISTANT:"
|
||||||
|
```
|
||||||
|
|
||||||
|
**output**
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image encoded in 21149.51 ms by CLIP ( 146.87 ms per image patch)
|
||||||
|
The image depicts a cat sitting in the grass near some tall green plants.
|
||||||
|
llama_print_timings: load time = 23257.32 ms
|
||||||
|
llama_print_timings: sample time = 5.25 ms / 18 runs ( 0.29 ms per token, 3430.53 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 11900.73 ms / 232 tokens ( 51.30 ms per token, 19.49 tokens per second)
|
||||||
|
llama_print_timings: eval time = 1279.03 ms / 18 runs ( 71.06 ms per token, 14.07 tokens per second)
|
||||||
|
llama_print_timings: total time = 34570.79 ms
|
||||||
|
```
|
||||||
|
|
||||||
|
## Minor shortcomings
|
||||||
|
The `n_patch` of output in `ldp` is 1/4 of the input. In order to implement quickly, we uniformly modified `clip_n_patches` function to a quarter. when counting the time consumption, the calculated time will be 4 times bigger than the real cost.
|
||||||
|
|
||||||
|
## TODO
|
||||||
|
|
||||||
|
- [ ] Support non-CPU backend for the new operators, such as `depthwise`, `hardswish`, `hardsigmoid`
|
||||||
|
- [ ] Optimize LDP projector performance
|
||||||
|
|
||||||
|
- Optimize the structure definition to avoid unnecessary memory rearrangements, to reduce the use of `ggml_permute_cpy`;
|
||||||
|
- Optimize operator implementation (ARM CPU/NVIDIA GPU): such as depthwise conv, hardswish, hardsigmoid, etc.
|
||||||
|
- [ ] run MobileVLM on `Jetson Orin`
|
||||||
|
- [ ] Support more model variants, such as `MobileVLM-3B`.
|
||||||
|
|
||||||
|
|
||||||
|
## contributor
|
||||||
|
```sh
|
||||||
|
zhangjidong05, yangyang260, huyiming03, chenxiaotao03
|
||||||
|
```
|
53
examples/llava/android/adb_run.sh
Executable file
53
examples/llava/android/adb_run.sh
Executable file
@ -0,0 +1,53 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
model_dir="/Users/cxt/model/llm/mobileVLM/MobileVLM-1.7B_processed"
|
||||||
|
projector_name="mmproj-model-f16.gguf"
|
||||||
|
llama_name="ggml-model-q4_k.gguf"
|
||||||
|
img_dir="/Users/cxt/model/llm"
|
||||||
|
img_name="demo.jpg"
|
||||||
|
prompt="A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWho is the author of this book? \nAnswer the question using a single word or phrase. ASSISTANT:"
|
||||||
|
# img_name="cat.jpeg"
|
||||||
|
# prompt="A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat is in the image? ASSISTANT:"
|
||||||
|
|
||||||
|
program_dir="build_64/bin"
|
||||||
|
binName="llava-cli"
|
||||||
|
n_threads=4
|
||||||
|
|
||||||
|
|
||||||
|
deviceDir="/data/local/tmp"
|
||||||
|
saveDir="output"
|
||||||
|
if [ ! -d ${saveDir} ]; then
|
||||||
|
mkdir ${saveDir}
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
function android_run() {
|
||||||
|
# # copy resource into device
|
||||||
|
# adb push ${model_dir}/${projector_name} ${deviceDir}/${projector_name}
|
||||||
|
# adb push ${model_dir}/${llama_name} ${deviceDir}/${llama_name}
|
||||||
|
adb push ${img_dir}/${img_name} ${deviceDir}/${img_name}
|
||||||
|
# copy program into device
|
||||||
|
adb push ${program_dir}/${binName} ${deviceDir}/${binName}
|
||||||
|
adb shell "chmod 0777 ${deviceDir}/${binName}"
|
||||||
|
|
||||||
|
# run
|
||||||
|
adb shell "echo cd ${deviceDir} ${deviceDir}/${binName} \
|
||||||
|
-m ${deviceDir}/${llama_name} \
|
||||||
|
--mmproj ${deviceDir}/${projector_name} \
|
||||||
|
-t ${n_threads} \
|
||||||
|
--image ${deviceDir}/${img_name} \
|
||||||
|
-p \"${prompt}\" \
|
||||||
|
> ${deviceDir}/${modelName}_${projector_name}_${n_threads}_${img_name}.txt"
|
||||||
|
adb shell "cd ${deviceDir}; pwd; ${deviceDir}/${binName} \
|
||||||
|
-m ${deviceDir}/${llama_name} \
|
||||||
|
--mmproj ${deviceDir}/${projector_name} \
|
||||||
|
-t ${n_threads} \
|
||||||
|
--image ${deviceDir}/${img_name} \
|
||||||
|
-p \"${prompt}\" \
|
||||||
|
>> ${deviceDir}/${modelName}_${projector_name}_${n_threads}_${img_name}.txt 2>&1"
|
||||||
|
adb pull ${deviceDir}/${modelName}_${projector_name}_${n_threads}_${img_name}.txt ${saveDir}
|
||||||
|
}
|
||||||
|
|
||||||
|
android_run
|
||||||
|
|
||||||
|
echo "android_run is Done!"
|
8
examples/llava/android/build_64.sh
Executable file
8
examples/llava/android/build_64.sh
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
cmake ../../../../ \
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DANDROID_ABI="arm64-v8a" \
|
||||||
|
-DANDROID_PLATFORM=android-23 $1
|
||||||
|
|
||||||
|
make -j4
|
@ -12,6 +12,7 @@
|
|||||||
#include <regex>
|
#include <regex>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
#include "clip.h"
|
#include "clip.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
@ -67,6 +68,7 @@ static std::string format(const char * fmt, ...) {
|
|||||||
#define KEY_PATCH_SIZE "clip.vision.patch_size"
|
#define KEY_PATCH_SIZE "clip.vision.patch_size"
|
||||||
#define KEY_IMAGE_MEAN "clip.vision.image_mean"
|
#define KEY_IMAGE_MEAN "clip.vision.image_mean"
|
||||||
#define KEY_IMAGE_STD "clip.vision.image_std"
|
#define KEY_IMAGE_STD "clip.vision.image_std"
|
||||||
|
#define KEY_PROJ_TYPE "clip.projector_type"
|
||||||
|
|
||||||
//
|
//
|
||||||
// tensor name constants
|
// tensor name constants
|
||||||
@ -89,6 +91,21 @@ static std::string format(const char * fmt, ...) {
|
|||||||
#define TN_TEXT_PROJ "text_projection.weight"
|
#define TN_TEXT_PROJ "text_projection.weight"
|
||||||
#define TN_VIS_PROJ "visual_projection.weight"
|
#define TN_VIS_PROJ "visual_projection.weight"
|
||||||
#define TN_LLAVA_PROJ "mm.%d.%s"
|
#define TN_LLAVA_PROJ "mm.%d.%s"
|
||||||
|
#define TN_MVLM_PROJ_MLP "mm.model.mlp.%d.%s"
|
||||||
|
#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s"
|
||||||
|
|
||||||
|
|
||||||
|
enum projector_type {
|
||||||
|
PROJECTOR_TYPE_MLP,
|
||||||
|
PROJECTOR_TYPE_LDP,
|
||||||
|
PROJECTOR_TYPE_UNKNOWN,
|
||||||
|
};
|
||||||
|
|
||||||
|
static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
||||||
|
{ PROJECTOR_TYPE_MLP, "mlp" },
|
||||||
|
{ PROJECTOR_TYPE_LDP, "ldp" },
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// utilities to get data from a gguf file
|
// utilities to get data from a gguf file
|
||||||
@ -129,6 +146,91 @@ static std::string get_ftype(int ftype) {
|
|||||||
return ggml_type_name(static_cast<ggml_type>(ftype));
|
return ggml_type_name(static_cast<ggml_type>(ftype));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
|
||||||
|
switch (type) {
|
||||||
|
case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
|
||||||
|
case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
|
||||||
|
case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
|
||||||
|
case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
|
||||||
|
case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
|
||||||
|
case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
|
||||||
|
case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
|
||||||
|
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
|
||||||
|
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
|
||||||
|
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
|
||||||
|
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
|
||||||
|
default: return format("unknown type %d", type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
||||||
|
std::string result;
|
||||||
|
for (size_t pos = 0; ; pos += search.length()) {
|
||||||
|
auto new_pos = s.find(search, pos);
|
||||||
|
if (new_pos == std::string::npos) {
|
||||||
|
result += s.substr(pos, s.size() - pos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
result += s.substr(pos, new_pos - pos) + replace;
|
||||||
|
pos = new_pos;
|
||||||
|
}
|
||||||
|
s = std::move(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
|
||||||
|
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case GGUF_TYPE_STRING:
|
||||||
|
return gguf_get_val_str(ctx_gguf, i);
|
||||||
|
case GGUF_TYPE_ARRAY:
|
||||||
|
{
|
||||||
|
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
|
||||||
|
int arr_n = gguf_get_arr_n(ctx_gguf, i);
|
||||||
|
const void * data = gguf_get_arr_data(ctx_gguf, i);
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "[";
|
||||||
|
for (int j = 0; j < arr_n; j++) {
|
||||||
|
if (arr_type == GGUF_TYPE_STRING) {
|
||||||
|
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
|
||||||
|
// escape quotes
|
||||||
|
replace_all(val, "\\", "\\\\");
|
||||||
|
replace_all(val, "\"", "\\\"");
|
||||||
|
ss << '"' << val << '"';
|
||||||
|
} else if (arr_type == GGUF_TYPE_ARRAY) {
|
||||||
|
ss << "???";
|
||||||
|
} else {
|
||||||
|
ss << gguf_data_to_str(arr_type, data, j);
|
||||||
|
}
|
||||||
|
if (j < arr_n - 1) {
|
||||||
|
ss << ", ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ss << "]";
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") {
|
||||||
|
size_t tensor_size = ggml_nbytes(tensor);
|
||||||
|
printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%d, %d, %d, %d], type: %d\n",
|
||||||
|
prefix, ggml_n_dims(tensor), tensor->name, tensor_size,
|
||||||
|
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->type);
|
||||||
|
}
|
||||||
|
|
||||||
|
static projector_type clip_projector_type_from_string(const std::string & name) {
|
||||||
|
for (const auto & kv : PROJECTOR_TYPE_NAMES) { // NOLINT
|
||||||
|
if (kv.second == name) {
|
||||||
|
return kv.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return PROJECTOR_TYPE_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// image data
|
// image data
|
||||||
//
|
//
|
||||||
@ -205,6 +307,32 @@ struct clip_vision_model {
|
|||||||
struct ggml_tensor * mm_0_b;
|
struct ggml_tensor * mm_0_b;
|
||||||
struct ggml_tensor * mm_2_w;
|
struct ggml_tensor * mm_2_w;
|
||||||
struct ggml_tensor * mm_2_b;
|
struct ggml_tensor * mm_2_b;
|
||||||
|
|
||||||
|
// MobileVLM projection
|
||||||
|
struct ggml_tensor * mm_model_mlp_1_w;
|
||||||
|
struct ggml_tensor * mm_model_mlp_1_b;
|
||||||
|
struct ggml_tensor * mm_model_mlp_3_w;
|
||||||
|
struct ggml_tensor * mm_model_mlp_3_b;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_0_0_w;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_0_1_w;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_0_1_b;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_1_fc1_w;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_1_fc1_b;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_1_fc2_w;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_1_fc2_b;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_2_0_w;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_2_1_w;
|
||||||
|
struct ggml_tensor * mm_model_block_1_block_2_1_b;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_0_0_w;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_0_1_w;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_0_1_b;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_1_fc1_w;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_1_fc1_b;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_1_fc2_w;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_1_fc2_b;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_2_0_w;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_2_1_w;
|
||||||
|
struct ggml_tensor * mm_model_block_2_block_2_1_b;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct clip_ctx {
|
struct clip_ctx {
|
||||||
@ -213,6 +341,7 @@ struct clip_ctx {
|
|||||||
bool has_llava_projector = false;
|
bool has_llava_projector = false;
|
||||||
|
|
||||||
struct clip_vision_model vision_model;
|
struct clip_vision_model vision_model;
|
||||||
|
projector_type proj_type = PROJECTOR_TYPE_MLP;
|
||||||
|
|
||||||
float image_mean[3];
|
float image_mean[3];
|
||||||
float image_std[3];
|
float image_std[3];
|
||||||
@ -430,16 +559,135 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|||||||
free(patches_data);
|
free(patches_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// shape [1, 576, 1024]
|
||||||
|
// ne is whcn, ne = [1024, 576, 1, 1]
|
||||||
embeddings = ggml_get_rows(ctx0, embeddings, patches);
|
embeddings = ggml_get_rows(ctx0, embeddings, patches);
|
||||||
|
|
||||||
// mm projection 0
|
// print_tensor_info(embeddings, "embeddings");
|
||||||
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
|
||||||
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
|
||||||
|
|
||||||
embeddings = ggml_gelu(ctx0, embeddings);
|
// llava projector
|
||||||
|
if (ctx->proj_type == PROJECTOR_TYPE_MLP) {
|
||||||
|
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
||||||
|
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
||||||
|
|
||||||
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
embeddings = ggml_gelu(ctx0, embeddings);
|
||||||
embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
|
||||||
|
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
||||||
|
embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
||||||
|
}
|
||||||
|
else if (ctx->proj_type == PROJECTOR_TYPE_LDP) {
|
||||||
|
// MobileVLM projector
|
||||||
|
int n_patch = 24;
|
||||||
|
struct ggml_tensor * mlp_1 = ggml_mul_mat(ctx0, model.mm_model_mlp_1_w, embeddings);
|
||||||
|
mlp_1 = ggml_add(ctx0, mlp_1, model.mm_model_mlp_1_b);
|
||||||
|
mlp_1 = ggml_gelu(ctx0, mlp_1);
|
||||||
|
struct ggml_tensor * mlp_3 = ggml_mul_mat(ctx0, model.mm_model_mlp_3_w, mlp_1);
|
||||||
|
mlp_3 = ggml_add(ctx0, mlp_3, model.mm_model_mlp_3_b);
|
||||||
|
// mlp_3 shape = [1, 576, 2048], ne = [2048, 576, 1, 1]
|
||||||
|
|
||||||
|
// block 1
|
||||||
|
struct ggml_tensor * block_1 = nullptr;
|
||||||
|
{
|
||||||
|
// transpose from [1, 576, 2048] --> [1, 2048, 576] --> [1, 2048, 24, 24]
|
||||||
|
mlp_3 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_3, 1, 0, 2, 3));
|
||||||
|
mlp_3 = ggml_reshape_4d(ctx0, mlp_3, n_patch, n_patch, mlp_3->ne[1], mlp_3->ne[2]);
|
||||||
|
// stride = 1, padding = 1, bias is nullptr
|
||||||
|
block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_1_block_0_0_w, mlp_3, nullptr, 1, 1, 1, 1, 1, 1);
|
||||||
|
|
||||||
|
// layer norm
|
||||||
|
// // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
|
||||||
|
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 2, 0, 3));
|
||||||
|
// block_1 shape = [1, 24, 24, 2048], ne = [2048, 24, 24, 1]
|
||||||
|
block_1 = ggml_norm(ctx0, block_1, eps);
|
||||||
|
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_1_block_0_1_w), model.mm_model_block_1_block_0_1_b);
|
||||||
|
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 2, 0, 1, 3));
|
||||||
|
|
||||||
|
// block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
|
||||||
|
// hardswish
|
||||||
|
struct ggml_tensor * block_1_hw = ggml_hardswish(ctx0, block_1);
|
||||||
|
|
||||||
|
block_1 = ggml_pool_2d(ctx0, block_1_hw, GGML_OP_POOL_AVG, block_1_hw->ne[0], block_1_hw->ne[1], block_1_hw->ne[0], block_1_hw->ne[1], 0, 0);
|
||||||
|
// block_1 shape = [1, 2048, 1, 1], ne = [1, 1, 2048, 1]
|
||||||
|
// pointwise conv
|
||||||
|
block_1 = ggml_reshape_2d(ctx0, block_1, block_1->ne[0]*block_1->ne[1]*block_1->ne[2], block_1->ne[3]);
|
||||||
|
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_1_block_1_fc1_w, block_1);
|
||||||
|
block_1 = ggml_add(ctx0, block_1, model.mm_model_block_1_block_1_fc1_b);
|
||||||
|
block_1 = ggml_relu(ctx0, block_1);
|
||||||
|
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_1_block_1_fc2_w, block_1);
|
||||||
|
block_1 = ggml_add(ctx0, block_1, model.mm_model_block_1_block_1_fc2_b);
|
||||||
|
block_1 = ggml_hardsigmoid(ctx0, block_1);
|
||||||
|
// block_1_hw shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1], block_1 shape = [1, 2048], ne = [2048, 1, 1, 1]
|
||||||
|
block_1 = ggml_reshape_4d(ctx0, block_1, 1, 1, block_1->ne[0], block_1->ne[1]);
|
||||||
|
block_1 = ggml_mul(ctx0, block_1_hw, block_1);
|
||||||
|
|
||||||
|
int w = block_1->ne[0], h = block_1->ne[1];
|
||||||
|
block_1 = ggml_reshape_3d(ctx0, block_1, w*h, block_1->ne[2], block_1->ne[3]);
|
||||||
|
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 0, 2, 3));
|
||||||
|
|
||||||
|
// block_1 shape = [1, 24*24, 2048], ne = [24*24, 2048, 1]
|
||||||
|
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_1_block_2_0_w, block_1);
|
||||||
|
block_1 = ggml_reshape_4d(ctx0, block_1, block_1->ne[0], w, h, block_1->ne[3]);
|
||||||
|
|
||||||
|
// block_1 shape = [1, 24, 24, 2048], ne = [2048, 24, 24, 1]
|
||||||
|
block_1 = ggml_norm(ctx0, block_1, eps);
|
||||||
|
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_1_block_2_1_w), model.mm_model_block_1_block_2_1_b);
|
||||||
|
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 2, 0, 1, 3));
|
||||||
|
// block1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
|
||||||
|
// residual
|
||||||
|
block_1 = ggml_add(ctx0, mlp_3, block_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// block_2
|
||||||
|
{
|
||||||
|
// stride = 2
|
||||||
|
block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_2_block_0_0_w, block_1, nullptr, 2, 2, 1, 1, 1, 1);
|
||||||
|
|
||||||
|
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
|
||||||
|
// layer norm
|
||||||
|
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 2, 0, 3));
|
||||||
|
// block_1 shape = [1, 12, 12, 2048], ne = [2048, 12, 12, 1]
|
||||||
|
block_1 = ggml_norm(ctx0, block_1, eps);
|
||||||
|
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_2_block_0_1_w), model.mm_model_block_2_block_0_1_b);
|
||||||
|
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 2, 0, 1, 3));
|
||||||
|
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
|
||||||
|
// hardswish
|
||||||
|
struct ggml_tensor * block_1_hw = ggml_hardswish(ctx0, block_1);
|
||||||
|
|
||||||
|
// not sure the parameters is right for globalAvgPooling
|
||||||
|
block_1 = ggml_pool_2d(ctx0, block_1_hw, GGML_OP_POOL_AVG, block_1_hw->ne[0], block_1_hw->ne[1], block_1_hw->ne[0], block_1_hw->ne[1], 0, 0);
|
||||||
|
// block_1 shape = [1, 2048, 1, 1], ne = [1, 1, 2048, 1]
|
||||||
|
// pointwise conv
|
||||||
|
block_1 = ggml_reshape_2d(ctx0, block_1, block_1->ne[0]*block_1->ne[1]*block_1->ne[2], block_1->ne[3]);
|
||||||
|
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_2_block_1_fc1_w, block_1);
|
||||||
|
block_1 = ggml_add(ctx0, block_1, model.mm_model_block_2_block_1_fc1_b);
|
||||||
|
block_1 = ggml_relu(ctx0, block_1);
|
||||||
|
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_2_block_1_fc2_w, block_1);
|
||||||
|
block_1 = ggml_add(ctx0, block_1, model.mm_model_block_2_block_1_fc2_b);
|
||||||
|
block_1 = ggml_hardsigmoid(ctx0, block_1);
|
||||||
|
|
||||||
|
// block_1_hw shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1], block_1 shape = [1, 2048, 1, 1], ne = [1, 1, 2048, 1]
|
||||||
|
block_1 = ggml_reshape_4d(ctx0, block_1, 1, 1, block_1->ne[0], block_1->ne[1]);
|
||||||
|
block_1 = ggml_mul(ctx0, block_1_hw, block_1);
|
||||||
|
|
||||||
|
int w = block_1->ne[0], h = block_1->ne[1];
|
||||||
|
block_1 = ggml_reshape_3d(ctx0, block_1, w*h, block_1->ne[2], block_1->ne[3]);
|
||||||
|
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 0, 2, 3));
|
||||||
|
// block_1 shape = [1, 24*24, 2048], ne = [24*24, 2048, 1]
|
||||||
|
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_2_block_2_0_w, block_1);
|
||||||
|
block_1 = ggml_reshape_4d(ctx0, block_1, block_1->ne[0], w, h, block_1->ne[3]);
|
||||||
|
|
||||||
|
|
||||||
|
// block_1 shape = [1, 12, 12, 2048], ne = [2048, 12, 12, 1]
|
||||||
|
block_1 = ggml_norm(ctx0, block_1, eps);
|
||||||
|
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_2_block_2_1_w), model.mm_model_block_2_block_2_1_b);
|
||||||
|
block_1 = ggml_reshape_3d(ctx0, block_1, block_1->ne[0], block_1->ne[1] * block_1->ne[2], block_1->ne[3]);
|
||||||
|
// block_1 shape = [1, 144, 2048], ne = [2048, 144, 1]
|
||||||
|
}
|
||||||
|
embeddings = block_1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// build the graph
|
// build the graph
|
||||||
@ -485,16 +733,55 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||||
|
|
||||||
// kv
|
// kv
|
||||||
if (verbosity >= 3) {
|
const int n_kv = gguf_get_n_kv(ctx);
|
||||||
const int n_kv = gguf_get_n_kv(ctx);
|
printf("%s: loaded meta data with %d key-value pairs and %d tensors from %s\n",
|
||||||
|
__func__, n_kv, n_tensors, fname);
|
||||||
|
{
|
||||||
|
std::map<enum ggml_type, uint32_t> n_type;
|
||||||
|
|
||||||
for (int i = 0; i < n_kv; ++i) {
|
uint32_t n_type_max = 0;
|
||||||
const char * key = gguf_get_key(ctx, i);
|
enum ggml_type type_max = GGML_TYPE_F32;
|
||||||
|
|
||||||
printf("%s: kv[%d]: key = %s\n", __func__, i, key);
|
for (int i = 0; i < n_tensors; i++) {
|
||||||
|
enum ggml_type type = gguf_get_tensor_type(ctx, i);
|
||||||
|
|
||||||
|
n_type[type]++;
|
||||||
|
|
||||||
|
if (n_type_max < n_type[type]) {
|
||||||
|
n_type_max = n_type[type];
|
||||||
|
type_max = type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
|
||||||
|
for (int i = 0; i < n_kv; i++) {
|
||||||
|
const char * name = gguf_get_key(ctx, i);
|
||||||
|
const enum gguf_type type = gguf_get_kv_type(ctx, i);
|
||||||
|
const std::string type_name =
|
||||||
|
type == GGUF_TYPE_ARRAY
|
||||||
|
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i))
|
||||||
|
: gguf_type_name(type);
|
||||||
|
|
||||||
|
std::string value = gguf_kv_to_str(ctx, i);
|
||||||
|
const size_t MAX_VALUE_LEN = 40;
|
||||||
|
if (value.size() > MAX_VALUE_LEN) {
|
||||||
|
value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());
|
||||||
|
}
|
||||||
|
replace_all(value, "\n", "\\n");
|
||||||
|
|
||||||
|
printf("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// print type counts
|
||||||
|
for (auto & kv : n_type) {
|
||||||
|
if (kv.second == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);
|
||||||
}
|
}
|
||||||
printf("\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// data
|
// data
|
||||||
@ -503,20 +790,35 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||||||
for (int i = 0; i < n_tensors; ++i) {
|
for (int i = 0; i < n_tensors; ++i) {
|
||||||
const char * name = gguf_get_tensor_name(ctx, i);
|
const char * name = gguf_get_tensor_name(ctx, i);
|
||||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||||
|
enum ggml_type type = gguf_get_tensor_type(ctx, i);
|
||||||
struct ggml_tensor * cur = ggml_get_tensor(meta, name);
|
struct ggml_tensor * cur = ggml_get_tensor(meta, name);
|
||||||
size_t tensor_size = ggml_nbytes(cur);
|
size_t tensor_size = ggml_nbytes(cur);
|
||||||
buffer_size += tensor_size;
|
buffer_size += tensor_size;
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu\n", __func__, i,
|
printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%d, %d, %d, %d], type: %d\n", __func__, i,
|
||||||
ggml_n_dims(cur), cur->name, tensor_size, offset);
|
ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
buffer_size += n_tensors * 128 /* CLIP PADDING */;
|
buffer_size += n_tensors * 128 /* CLIP PADDING */;
|
||||||
|
|
||||||
clip_ctx * new_clip = new clip_ctx;
|
clip_ctx * new_clip = new clip_ctx;
|
||||||
|
|
||||||
|
// update projector type
|
||||||
|
{
|
||||||
|
int idx = gguf_find_key(ctx, KEY_PROJ_TYPE);
|
||||||
|
if (idx != -1) {
|
||||||
|
const std::string proj_type = gguf_get_val_str(ctx, idx);
|
||||||
|
new_clip->proj_type = clip_projector_type_from_string(proj_type);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
new_clip->proj_type = PROJECTOR_TYPE_MLP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
new_clip->backend = ggml_backend_cuda_init(0);
|
new_clip->backend = ggml_backend_cuda_init(0);
|
||||||
printf("%s: CLIP using CUDA backend\n", __func__);
|
printf("%s: CLIP using CUDA backend\n", __func__);
|
||||||
@ -661,10 +963,45 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||||||
vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
|
vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
|
||||||
vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
|
vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
|
||||||
vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
|
vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
|
||||||
vision_model.mm_0_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "weight"));
|
|
||||||
vision_model.mm_0_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "bias"));
|
// LLaVA projection
|
||||||
vision_model.mm_2_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight"));
|
if (new_clip->proj_type == PROJECTOR_TYPE_MLP) {
|
||||||
vision_model.mm_2_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias"));
|
vision_model.mm_0_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "weight"));
|
||||||
|
vision_model.mm_0_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "bias"));
|
||||||
|
vision_model.mm_2_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight"));
|
||||||
|
vision_model.mm_2_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias"));
|
||||||
|
}
|
||||||
|
else if (new_clip->proj_type == PROJECTOR_TYPE_LDP) {
|
||||||
|
// MobileVLM projection
|
||||||
|
vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "weight"));
|
||||||
|
vision_model.mm_model_mlp_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "bias"));
|
||||||
|
vision_model.mm_model_mlp_3_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 3, "weight"));
|
||||||
|
vision_model.mm_model_mlp_3_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 3, "bias"));
|
||||||
|
vision_model.mm_model_block_1_block_0_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "0.weight"));
|
||||||
|
vision_model.mm_model_block_1_block_0_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.weight"));
|
||||||
|
vision_model.mm_model_block_1_block_0_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.bias"));
|
||||||
|
vision_model.mm_model_block_1_block_1_fc1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.weight"));
|
||||||
|
vision_model.mm_model_block_1_block_1_fc1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.bias"));
|
||||||
|
vision_model.mm_model_block_1_block_1_fc2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.weight"));
|
||||||
|
vision_model.mm_model_block_1_block_1_fc2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.bias"));
|
||||||
|
vision_model.mm_model_block_1_block_2_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "0.weight"));
|
||||||
|
vision_model.mm_model_block_1_block_2_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.weight"));
|
||||||
|
vision_model.mm_model_block_1_block_2_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.bias"));
|
||||||
|
vision_model.mm_model_block_2_block_0_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "0.weight"));
|
||||||
|
vision_model.mm_model_block_2_block_0_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.weight"));
|
||||||
|
vision_model.mm_model_block_2_block_0_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.bias"));
|
||||||
|
vision_model.mm_model_block_2_block_1_fc1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.weight"));
|
||||||
|
vision_model.mm_model_block_2_block_1_fc1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.bias"));
|
||||||
|
vision_model.mm_model_block_2_block_1_fc2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.weight"));
|
||||||
|
vision_model.mm_model_block_2_block_1_fc2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.bias"));
|
||||||
|
vision_model.mm_model_block_2_block_2_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "0.weight"));
|
||||||
|
vision_model.mm_model_block_2_block_2_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.weight"));
|
||||||
|
vision_model.mm_model_block_2_block_2_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.bias"));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string proj_type = PROJECTOR_TYPE_NAMES[new_clip->proj_type];
|
||||||
|
throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str()));
|
||||||
|
}
|
||||||
|
|
||||||
vision_model.layers.resize(hparams.n_layer);
|
vision_model.layers.resize(hparams.n_layer);
|
||||||
for (int il = 0; il < hparams.n_layer; ++il) {
|
for (int il = 0; il < hparams.n_layer; ++il) {
|
||||||
@ -1100,13 +1437,25 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||||
return ctx->vision_model.mm_2_b->ne[0];
|
if (ctx->proj_type == PROJECTOR_TYPE_LDP) {
|
||||||
|
return ctx->vision_model.mm_model_block_1_block_2_1_b->ne[0];
|
||||||
|
}
|
||||||
|
else if (ctx->proj_type == PROJECTOR_TYPE_MLP) {
|
||||||
|
return ctx->vision_model.mm_2_b->ne[0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string proj_type = PROJECTOR_TYPE_NAMES[ctx->proj_type];
|
||||||
|
throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int clip_n_patches(const struct clip_ctx * ctx) {
|
int clip_n_patches(const struct clip_ctx * ctx) {
|
||||||
auto & params = ctx->vision_model.hparams;
|
auto & params = ctx->vision_model.hparams;
|
||||||
|
int n_patches = (params.image_size / params.patch_size) * (params.image_size / params.patch_size);
|
||||||
return (params.image_size / params.patch_size) * (params.image_size / params.patch_size);
|
if (ctx->proj_type == PROJECTOR_TYPE_LDP) {
|
||||||
|
n_patches /= 4;
|
||||||
|
}
|
||||||
|
return n_patches;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t clip_embd_nbytes(const struct clip_ctx * ctx) {
|
size_t clip_embd_nbytes(const struct clip_ctx * ctx) {
|
||||||
|
@ -81,6 +81,7 @@ ap.add_argument("--vision-only", action="store_true", required=False,
|
|||||||
ap.add_argument("--clip_model_is_vision", action="store_true", required=False,
|
ap.add_argument("--clip_model_is_vision", action="store_true", required=False,
|
||||||
help="The clip model is a pure vision model (ShareGPT4V vision extract for example)")
|
help="The clip model is a pure vision model (ShareGPT4V vision extract for example)")
|
||||||
ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.")
|
ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.")
|
||||||
|
ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp", choices=["mlp", "ldp"], default="mlp")
|
||||||
ap.add_argument("--image-mean", nargs=3, type=float, required=False, help="Override image mean values")
|
ap.add_argument("--image-mean", nargs=3, type=float, required=False, help="Override image mean values")
|
||||||
ap.add_argument("--image-std", nargs=3, type=float, required=False, help="Override image std values")
|
ap.add_argument("--image-std", nargs=3, type=float, required=False, help="Override image std values")
|
||||||
ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None)
|
ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None)
|
||||||
@ -174,6 +175,8 @@ elif args.vision_only and not has_llava_projector:
|
|||||||
fout.add_description("vision-only CLIP model")
|
fout.add_description("vision-only CLIP model")
|
||||||
elif has_llava_projector:
|
elif has_llava_projector:
|
||||||
fout.add_description("image encoder for LLaVA")
|
fout.add_description("image encoder for LLaVA")
|
||||||
|
# add projector type
|
||||||
|
fout.add_string("clip.projector_type", args.projector_type)
|
||||||
else:
|
else:
|
||||||
fout.add_description("two-tower CLIP model")
|
fout.add_description("two-tower CLIP model")
|
||||||
|
|
||||||
@ -218,7 +221,8 @@ if has_llava_projector:
|
|||||||
projector = torch.load(args.llava_projector)
|
projector = torch.load(args.llava_projector)
|
||||||
for name, data in projector.items():
|
for name, data in projector.items():
|
||||||
name = get_tensor_name(name)
|
name = get_tensor_name(name)
|
||||||
if data.ndim == 2:
|
# pw and dw conv ndim==4
|
||||||
|
if data.ndim == 2 or data.ndim == 4:
|
||||||
data = data.squeeze().numpy().astype(np.float16)
|
data = data.squeeze().numpy().astype(np.float16)
|
||||||
else:
|
else:
|
||||||
data = data.squeeze().numpy().astype(np.float32)
|
data = data.squeeze().numpy().astype(np.float32)
|
||||||
|
141
ggml.c
141
ggml.c
@ -1418,6 +1418,9 @@ inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) {
|
|||||||
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
||||||
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
||||||
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
||||||
|
// TODO: optimize performance
|
||||||
|
inline static void ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
|
||||||
|
inline static void ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
|
||||||
|
|
||||||
static const float GELU_COEF_A = 0.044715f;
|
static const float GELU_COEF_A = 0.044715f;
|
||||||
static const float GELU_QUICK_COEF = -1.702f;
|
static const float GELU_QUICK_COEF = -1.702f;
|
||||||
@ -1776,9 +1779,11 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|||||||
"GELU",
|
"GELU",
|
||||||
"GELU_QUICK",
|
"GELU_QUICK",
|
||||||
"SILU",
|
"SILU",
|
||||||
|
"HARDSWISH",
|
||||||
|
"HARDSIGMOID",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GGML_UNARY_OP_COUNT == 10, "GGML_UNARY_OP_COUNT != 10");
|
static_assert(GGML_UNARY_OP_COUNT == 12, "GGML_UNARY_OP_COUNT != 12");
|
||||||
|
|
||||||
|
|
||||||
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
||||||
@ -3945,6 +3950,20 @@ struct ggml_tensor * ggml_silu_back(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml hardswish
|
||||||
|
struct ggml_tensor * ggml_hardswish(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a) {
|
||||||
|
return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml hardsigmoid
|
||||||
|
struct ggml_tensor * ggml_hardsigmoid(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a) {
|
||||||
|
return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
|
||||||
|
}
|
||||||
|
|
||||||
// ggml_norm
|
// ggml_norm
|
||||||
|
|
||||||
static struct ggml_tensor * ggml_norm_impl(
|
static struct ggml_tensor * ggml_norm_impl(
|
||||||
@ -5344,6 +5363,33 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_conv_depthwise
|
||||||
|
struct ggml_tensor * ggml_conv_depthwise_2d(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
int s0,
|
||||||
|
int s1,
|
||||||
|
int p0,
|
||||||
|
int p1,
|
||||||
|
int d0,
|
||||||
|
int d1) {
|
||||||
|
|
||||||
|
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
||||||
|
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
|
||||||
|
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
||||||
|
s0, s1, p0, p1, d0, d1, true); // [N * IC, OH, OW, KH * KW]
|
||||||
|
|
||||||
|
struct ggml_tensor * result =
|
||||||
|
ggml_mul_mat(ctx,
|
||||||
|
ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1), // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
||||||
|
ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3])); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
||||||
|
|
||||||
|
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
// ggml_conv_2d
|
// ggml_conv_2d
|
||||||
|
|
||||||
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
||||||
@ -9333,6 +9379,87 @@ static void ggml_compute_forward_silu_back(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void ggml_compute_forward_hardswish_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * src0,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
assert(params->ith == 0);
|
||||||
|
assert(ggml_are_same_shape(src0, dst));
|
||||||
|
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int n = ggml_nrows(src0);
|
||||||
|
const int nc = src0->ne[0];
|
||||||
|
|
||||||
|
assert(dst->nb[0] == sizeof(float));
|
||||||
|
assert(src0->nb[0] == sizeof(float));
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
ggml_vec_hardswish_f32(nc,
|
||||||
|
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
||||||
|
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static void ggml_compute_forward_hardswish(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * src0,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
switch (src0->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_hardswish_f32(params, src0, dst);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_compute_forward_hardsigmoid_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * src0,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
assert(params->ith == 0);
|
||||||
|
assert(ggml_are_same_shape(src0, dst));
|
||||||
|
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int n = ggml_nrows(src0);
|
||||||
|
const int nc = src0->ne[0];
|
||||||
|
|
||||||
|
assert(dst->nb[0] == sizeof(float));
|
||||||
|
assert(src0->nb[0] == sizeof(float));
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
ggml_vec_hardsigmoid_f32(nc,
|
||||||
|
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
||||||
|
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_compute_forward_hardsigmoid(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * src0,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
switch (src0->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_hardsigmoid_f32(params, src0, dst);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// ggml_compute_forward_norm
|
// ggml_compute_forward_norm
|
||||||
|
|
||||||
static void ggml_compute_forward_norm_f32(
|
static void ggml_compute_forward_norm_f32(
|
||||||
@ -12349,6 +12476,7 @@ static void ggml_compute_forward_im2col(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ggml_compute_forward_conv_transpose_2d
|
// ggml_compute_forward_conv_transpose_2d
|
||||||
|
|
||||||
static void ggml_compute_forward_conv_transpose_2d(
|
static void ggml_compute_forward_conv_transpose_2d(
|
||||||
@ -13917,6 +14045,14 @@ static void ggml_compute_forward_unary(
|
|||||||
{
|
{
|
||||||
ggml_compute_forward_silu(params, src0, dst);
|
ggml_compute_forward_silu(params, src0, dst);
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_UNARY_OP_HARDSWISH:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_hardswish(params, src0, dst);
|
||||||
|
} break;
|
||||||
|
case GGML_UNARY_OP_HARDSIGMOID:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_hardsigmoid(params, src0, dst);
|
||||||
|
} break;
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
@ -16330,6 +16466,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|||||||
case GGML_UNARY_OP_TANH:
|
case GGML_UNARY_OP_TANH:
|
||||||
case GGML_UNARY_OP_ELU:
|
case GGML_UNARY_OP_ELU:
|
||||||
case GGML_UNARY_OP_RELU:
|
case GGML_UNARY_OP_RELU:
|
||||||
|
case GGML_UNARY_OP_HARDSWISH: // to opt for multiple threads
|
||||||
|
case GGML_UNARY_OP_HARDSIGMOID: // to opt for multiple threads
|
||||||
{
|
{
|
||||||
n_tasks = 1;
|
n_tasks = 1;
|
||||||
} break;
|
} break;
|
||||||
@ -16562,7 +16700,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||||||
// distribute new work or execute it direct if 1T
|
// distribute new work or execute it direct if 1T
|
||||||
while (++node_n < cgraph->n_nodes) {
|
while (++node_n < cgraph->n_nodes) {
|
||||||
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
||||||
|
|
||||||
struct ggml_tensor * node = cgraph->nodes[node_n];
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
||||||
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
||||||
|
|
||||||
|
24
ggml.h
24
ggml.h
@ -489,6 +489,8 @@ extern "C" {
|
|||||||
GGML_UNARY_OP_GELU,
|
GGML_UNARY_OP_GELU,
|
||||||
GGML_UNARY_OP_GELU_QUICK,
|
GGML_UNARY_OP_GELU_QUICK,
|
||||||
GGML_UNARY_OP_SILU,
|
GGML_UNARY_OP_SILU,
|
||||||
|
GGML_UNARY_OP_HARDSWISH,
|
||||||
|
GGML_UNARY_OP_HARDSIGMOID,
|
||||||
|
|
||||||
GGML_UNARY_OP_COUNT,
|
GGML_UNARY_OP_COUNT,
|
||||||
};
|
};
|
||||||
@ -1032,6 +1034,16 @@ extern "C" {
|
|||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b);
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
|
// hardswish(x) = x * relu6(x + 3) / 6
|
||||||
|
GGML_API struct ggml_tensor * ggml_hardswish(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a);
|
||||||
|
|
||||||
|
// hardsigmoid(x) = relu6(x + 3) / 6
|
||||||
|
GGML_API struct ggml_tensor * ggml_hardsigmoid(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a);
|
||||||
|
|
||||||
// normalize along rows
|
// normalize along rows
|
||||||
GGML_API struct ggml_tensor * ggml_norm(
|
GGML_API struct ggml_tensor * ggml_norm(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
@ -1483,6 +1495,18 @@ extern "C" {
|
|||||||
int d1,
|
int d1,
|
||||||
bool is_2D);
|
bool is_2D);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
int s0,
|
||||||
|
int s1,
|
||||||
|
int p0,
|
||||||
|
int p1,
|
||||||
|
int d0,
|
||||||
|
int d1);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_conv_1d(
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
|
Loading…
Reference in New Issue
Block a user