mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-15 15:29:53 +00:00
8f275a7c45
Some checks are pending
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-musa.Dockerfile platforms:linux/amd64 tag:full-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-musa.Dockerfile platforms:linux/amd64 tag:light-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-musa.Dockerfile platforms:linux/amd64 tag:server-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
Python Type-Check / pyright type-check (push) Waiting to run
* ggml: Add POOL2D OP for GPU ACC to the Vulkan. - The MobileVLM model now supports inference acceleration through GPU by utilizing the Vulkan backend. - A GGML_OP_POOL_2D shader has been added. (Pooling) - The encoding performance of the CLIP model improved from 2.8s on the CPU to 0.7s on the GPU. Signed-off-by: Changyeon Kim <cyzero.kim@samsung.com> * [fix] Correct the incorrect order of the parameters. fix casting to int. Signed-off-by: Changyeon Kim <cyzero.kim@samsung.com> --------- Signed-off-by: Changyeon Kim <cyzero.kim@samsung.com>
75 lines
1.7 KiB
Plaintext
75 lines
1.7 KiB
Plaintext
#version 450
|
|
|
|
#include "types.comp"
|
|
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
|
|
layout(push_constant) uniform parameter {
|
|
uint IW; uint IH;
|
|
uint OW; uint OH;
|
|
uint OC;
|
|
uint pelements;
|
|
uint op;
|
|
int k0; int k1;
|
|
int s0; int s1;
|
|
int p0; int p1;
|
|
} p;
|
|
|
|
#define BLOCK_SIZE 512
|
|
#define FLT_MAX 3.402823466e+38F
|
|
#define OP_POOL_MAX 0u
|
|
#define OP_POOL_AVG 1u
|
|
|
|
layout (local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in;
|
|
|
|
layout(binding = 0) readonly buffer X {A_TYPE data_a[];};
|
|
layout(binding = 1) writeonly buffer D {D_TYPE data_d[];};
|
|
|
|
void main() {
|
|
const uint idx = gl_GlobalInvocationID.x;
|
|
if (idx >= p.pelements) {
|
|
return;
|
|
}
|
|
|
|
const uint O_HW = p.OW * p.OH;
|
|
|
|
const uint nc = idx / O_HW;
|
|
const uint cur_oh = (idx % O_HW) / p.OW;
|
|
const uint cur_ow = (idx % O_HW) % p.OW;
|
|
|
|
const int start_h = int(cur_oh) * p.s0 - p.p0;
|
|
const uint bh = max(start_h, 0);
|
|
const uint eh = min(start_h + p.k0, p.IH);
|
|
|
|
const int start_w = int(cur_ow) * p.s1 - p.p1;
|
|
const uint bw = max(start_w, 0);
|
|
const uint ew = min(start_w + p.k1, p.IW);
|
|
|
|
const float scale = 1.0 / float(p.k0 * p.k1);
|
|
float res;
|
|
|
|
if (p.op == OP_POOL_AVG) {
|
|
res = 0.0;
|
|
} else if (p.op == OP_POOL_MAX) {
|
|
res = -FLT_MAX;
|
|
} else {
|
|
return;
|
|
}
|
|
|
|
#pragma unroll
|
|
for (uint i = bh; i < eh; i++) {
|
|
#pragma unroll
|
|
for (uint j = bw; j < ew; j++) {
|
|
const float cur = D_TYPE(data_a[nc * p.IH * p.IW + i * p.IW + j]);
|
|
|
|
if (p.op == OP_POOL_AVG) {
|
|
res += cur * scale;
|
|
} else if (p.op == OP_POOL_MAX) {
|
|
res = max(res, cur);
|
|
}
|
|
}
|
|
}
|
|
|
|
data_d[nc * O_HW + cur_oh * p.OW + cur_ow] = res;
|
|
}
|