mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-10 18:51:45 +00:00
This commit is contained in:
parent
dddf3771c2
commit
307ef9a588
@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: {
|
|||||||
};
|
};
|
||||||
|
|
||||||
postPatch = ''
|
postPatch = ''
|
||||||
substituteInPlace ./ggml/src/ggml-metal.m \
|
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
||||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||||
substituteInPlace ./ggml/src/ggml-metal.m \
|
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
||||||
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
||||||
'';
|
'';
|
||||||
|
|
||||||
|
85
Makefile
85
Makefile
@ -523,11 +523,11 @@ ifndef GGML_NO_ACCELERATE
|
|||||||
# Mac OS - include Accelerate framework.
|
# Mac OS - include Accelerate framework.
|
||||||
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
|
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
|
||||||
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
||||||
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
||||||
MK_LDFLAGS += -framework Accelerate
|
MK_LDFLAGS += -framework Accelerate
|
||||||
OBJ_GGML += ggml/src/ggml-blas.o
|
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
||||||
endif
|
endif
|
||||||
endif # GGML_NO_ACCELERATE
|
endif # GGML_NO_ACCELERATE
|
||||||
|
|
||||||
@ -552,36 +552,36 @@ ifdef GGML_OPENBLAS
|
|||||||
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
||||||
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
||||||
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
||||||
OBJ_GGML += ggml/src/ggml-blas.o
|
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
|
||||||
endif # GGML_OPENBLAS
|
endif # GGML_OPENBLAS
|
||||||
|
|
||||||
ifdef GGML_OPENBLAS64
|
ifdef GGML_OPENBLAS64
|
||||||
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
||||||
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
||||||
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
||||||
OBJ_GGML += ggml/src/ggml-blas.o
|
OBJ_GGML += src/ggml-blas/ggml-blas.o
|
||||||
endif # GGML_OPENBLAS64
|
endif # GGML_OPENBLAS64
|
||||||
|
|
||||||
ifdef GGML_BLIS
|
ifdef GGML_BLIS
|
||||||
MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis
|
MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis
|
||||||
MK_LDFLAGS += -lblis -L/usr/local/lib
|
MK_LDFLAGS += -lblis -L/usr/local/lib
|
||||||
OBJ_GGML += ggml/src/ggml-blas.o
|
OBJ_GGML += src/ggml-blas/ggml-blas.o
|
||||||
endif # GGML_BLIS
|
endif # GGML_BLIS
|
||||||
|
|
||||||
ifdef GGML_NVPL
|
ifdef GGML_NVPL
|
||||||
MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas
|
MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas
|
||||||
MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp
|
MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp
|
||||||
OBJ_GGML += ggml/src/ggml-blas.o
|
OBJ_GGML += src/ggml-blas/ggml-blas.o
|
||||||
endif # GGML_NVPL
|
endif # GGML_NVPL
|
||||||
|
|
||||||
ifndef GGML_NO_LLAMAFILE
|
ifndef GGML_NO_LLAMAFILE
|
||||||
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
||||||
OBJ_GGML += ggml/src/llamafile/sgemm.o
|
OBJ_GGML += ggml/src/ggml-cpu/llamafile/sgemm.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef GGML_NO_AMX
|
ifndef GGML_NO_AMX
|
||||||
MK_CPPFLAGS += -DGGML_USE_AMX
|
MK_CPPFLAGS += -DGGML_USE_AMX
|
||||||
OBJ_GGML += ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o
|
OBJ_GGML += ggml/src/ggml-amx/ggml-amx.o ggml/src/ggml-amx/mmq.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef GGML_RPC
|
ifdef GGML_RPC
|
||||||
@ -623,7 +623,7 @@ ifdef GGML_CUDA
|
|||||||
MK_NVCCFLAGS += -use_fast_math
|
MK_NVCCFLAGS += -use_fast_math
|
||||||
endif # GGML_MUSA
|
endif # GGML_MUSA
|
||||||
|
|
||||||
OBJ_GGML += ggml/src/ggml-cuda.o
|
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
|
||||||
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||||
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
||||||
|
|
||||||
@ -742,8 +742,8 @@ ggml/src/ggml-cuda/%.o: \
|
|||||||
ggml/src/ggml-cuda/common.cuh
|
ggml/src/ggml-cuda/common.cuh
|
||||||
$(NVCC_COMPILE)
|
$(NVCC_COMPILE)
|
||||||
|
|
||||||
ggml/src/ggml-cuda.o: \
|
ggml/src/ggml-cuda/ggml-cuda.o: \
|
||||||
ggml/src/ggml-cuda.cu \
|
ggml/src/ggml-cuda/ggml-cuda.cu \
|
||||||
ggml/include/ggml-cuda.h \
|
ggml/include/ggml-cuda.h \
|
||||||
ggml/include/ggml.h \
|
ggml/include/ggml.h \
|
||||||
ggml/include/ggml-backend.h \
|
ggml/include/ggml-backend.h \
|
||||||
@ -852,12 +852,12 @@ ifdef GGML_CUDA_NO_PEER_COPY
|
|||||||
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
|
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
|
||||||
endif # GGML_CUDA_NO_PEER_COPY
|
endif # GGML_CUDA_NO_PEER_COPY
|
||||||
|
|
||||||
OBJ_GGML += ggml/src/ggml-cuda.o
|
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
|
||||||
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||||
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
||||||
|
|
||||||
ggml/src/ggml-cuda.o: \
|
ggml/src/ggml-cuda/ggml-cuda.o: \
|
||||||
ggml/src/ggml-cuda.cu \
|
ggml/src/ggml-cuda/ggml-cuda.cu \
|
||||||
ggml/include/ggml-cuda.h \
|
ggml/include/ggml-cuda.h \
|
||||||
ggml/include/ggml.h \
|
ggml/include/ggml.h \
|
||||||
ggml/include/ggml-backend.h \
|
ggml/include/ggml-backend.h \
|
||||||
@ -877,7 +877,7 @@ endif # GGML_HIPBLAS
|
|||||||
ifdef GGML_METAL
|
ifdef GGML_METAL
|
||||||
MK_CPPFLAGS += -DGGML_USE_METAL
|
MK_CPPFLAGS += -DGGML_USE_METAL
|
||||||
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
||||||
OBJ_GGML += ggml/src/ggml-metal.o
|
OBJ_GGML += ggml/src/ggml-metal/ggml-metal.o
|
||||||
|
|
||||||
ifdef GGML_METAL_USE_BF16
|
ifdef GGML_METAL_USE_BF16
|
||||||
MK_CPPFLAGS += -DGGML_METAL_USE_BF16
|
MK_CPPFLAGS += -DGGML_METAL_USE_BF16
|
||||||
@ -892,18 +892,18 @@ endif
|
|||||||
endif # GGML_METAL
|
endif # GGML_METAL
|
||||||
|
|
||||||
ifdef GGML_METAL
|
ifdef GGML_METAL
|
||||||
ggml/src/ggml-metal.o: \
|
ggml/src/ggml-metal/ggml-metal.o: \
|
||||||
ggml/src/ggml-metal.m \
|
ggml/src/ggml-metal/ggml-metal.m \
|
||||||
ggml/include/ggml-metal.h \
|
ggml/include/ggml-metal.h \
|
||||||
ggml/include/ggml.h
|
ggml/include/ggml.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
ifdef GGML_METAL_EMBED_LIBRARY
|
ifdef GGML_METAL_EMBED_LIBRARY
|
||||||
ggml/src/ggml-metal-embed.o: \
|
ggml/src/ggml-metal-embed.o: \
|
||||||
ggml/src/ggml-metal.metal \
|
ggml/src/ggml-metal/ggml-metal.metal \
|
||||||
ggml/src/ggml-common.h
|
ggml/src/ggml-common.h
|
||||||
@echo "Embedding Metal library"
|
@echo "Embedding Metal library"
|
||||||
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
|
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal
|
||||||
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
|
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
|
||||||
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||||
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||||
@ -919,11 +919,16 @@ endif # GGML_METAL
|
|||||||
|
|
||||||
OBJ_GGML += \
|
OBJ_GGML += \
|
||||||
ggml/src/ggml.o \
|
ggml/src/ggml.o \
|
||||||
ggml/src/ggml-cpu.o \
|
ggml/src/ggml-aarch64.o \
|
||||||
ggml/src/ggml-alloc.o \
|
ggml/src/ggml-alloc.o \
|
||||||
ggml/src/ggml-backend.o \
|
ggml/src/ggml-backend.o \
|
||||||
|
ggml/src/ggml-backend-reg.o \
|
||||||
ggml/src/ggml-quants.o \
|
ggml/src/ggml-quants.o \
|
||||||
ggml/src/ggml-aarch64.o
|
ggml/src/ggml-threading.o \
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu.o \
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu-cpp.o \
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu-aarch64.o \
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu-quants.o
|
||||||
|
|
||||||
OBJ_LLAMA = \
|
OBJ_LLAMA = \
|
||||||
src/llama.o \
|
src/llama.o \
|
||||||
@ -1051,12 +1056,23 @@ ggml/src/ggml.o: \
|
|||||||
ggml/include/ggml.h
|
ggml/include/ggml.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
ggml/src/ggml-cpu.o: \
|
ggml/src/ggml-threading.o: \
|
||||||
ggml/src/ggml-cpu.c \
|
ggml/src/ggml-threading.cpp \
|
||||||
|
ggml/include/ggml.h
|
||||||
|
$(CXX) $(XXCFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu.o: \
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu.c \
|
||||||
ggml/include/ggml.h \
|
ggml/include/ggml.h \
|
||||||
ggml/src/ggml-common.h
|
ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu.cpp \
|
||||||
|
ggml/include/ggml.h \
|
||||||
|
ggml/src/ggml-common.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
ggml/src/ggml-alloc.o: \
|
ggml/src/ggml-alloc.o: \
|
||||||
ggml/src/ggml-alloc.c \
|
ggml/src/ggml-alloc.c \
|
||||||
ggml/include/ggml.h \
|
ggml/include/ggml.h \
|
||||||
@ -1084,22 +1100,22 @@ ggml/src/ggml-aarch64.o: \
|
|||||||
ggml/src/ggml-common.h
|
ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
ggml/src/ggml-blas.o: \
|
ggml/src/ggml-blas/ggml-blas.o: \
|
||||||
ggml/src/ggml-blas.cpp \
|
ggml/src/ggml-blas/ggml-blas.cpp \
|
||||||
ggml/include/ggml-blas.h
|
ggml/include/ggml-blas.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
ifndef GGML_NO_LLAMAFILE
|
ifndef GGML_NO_LLAMAFILE
|
||||||
ggml/src/llamafile/sgemm.o: \
|
ggml/src/ggml-cpu/llamafile/sgemm.o: \
|
||||||
ggml/src/llamafile/sgemm.cpp \
|
ggml/src/ggml-cpu/llamafile/sgemm.cpp \
|
||||||
ggml/src/llamafile/sgemm.h \
|
ggml/src/ggml-cpu/llamafile/sgemm.h \
|
||||||
ggml/include/ggml.h
|
ggml/include/ggml.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@ -I ggml/src -I ggml/src/ggml-cpu
|
||||||
endif # GGML_NO_LLAMAFILE
|
endif # GGML_NO_LLAMAFILE
|
||||||
|
|
||||||
ifndef GGML_NO_AMX
|
ifndef GGML_NO_AMX
|
||||||
ggml/src/ggml-amx.o: \
|
ggml/src/ggml-amx/ggml-amx.o: \
|
||||||
ggml/src/ggml-amx.cpp \
|
ggml/src/ggml-amx/ggml-amx.cpp \
|
||||||
ggml/include/ggml-amx.h
|
ggml/include/ggml-amx.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
@ -1250,10 +1266,11 @@ clean:
|
|||||||
rm -rvf ggml/*.a
|
rm -rvf ggml/*.a
|
||||||
rm -rvf ggml/*.dll
|
rm -rvf ggml/*.dll
|
||||||
rm -rvf ggml/*.so
|
rm -rvf ggml/*.so
|
||||||
rm -vrf ggml/src/*.o
|
rm -rvf ggml/src/*.o
|
||||||
rm -rvf ggml/src/llamafile/*.o
|
rm -rvf ggml/src/ggml-cpu/*.o
|
||||||
|
rm -rvf ggml/src/ggml-cpu/llamafile/*.o
|
||||||
rm -rvf common/build-info.cpp
|
rm -rvf common/build-info.cpp
|
||||||
rm -vrf ggml/src/ggml-metal-embed.metal
|
rm -vrf ggml/src/ggml-metal/ggml-metal-embed.metal
|
||||||
rm -vrf ggml/src/ggml-cuda/*.o
|
rm -vrf ggml/src/ggml-cuda/*.o
|
||||||
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
||||||
rm -vrf ggml/src/ggml-amx/*.o
|
rm -vrf ggml/src/ggml-amx/*.o
|
||||||
|
@ -30,8 +30,8 @@ var cSettings: [CSetting] = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
#if canImport(Darwin)
|
#if canImport(Darwin)
|
||||||
sources.append("ggml/src/ggml-metal.m")
|
sources.append("ggml/src/ggml-metal/ggml-metal.m")
|
||||||
resources.append(.process("ggml/src/ggml-metal.metal"))
|
resources.append(.process("ggml/src/ggml-metal/ggml-metal.metal"))
|
||||||
linkerSettings.append(.linkedFramework("Accelerate"))
|
linkerSettings.append(.linkedFramework("Accelerate"))
|
||||||
cSettings.append(
|
cSettings.append(
|
||||||
contentsOf: [
|
contentsOf: [
|
||||||
|
@ -421,9 +421,18 @@ ggml_backend_reg_t ggml_backend_amx_reg(void) {
|
|||||||
|
|
||||||
#else // if defined(__AMX_INT8__)
|
#else // if defined(__AMX_INT8__)
|
||||||
|
|
||||||
|
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ggml_backend_is_amx(ggml_backend_t backend) {
|
||||||
|
GGML_UNUSED(backend);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
ggml_backend_t ggml_backend_amx_init(void) {
|
ggml_backend_t ggml_backend_amx_init(void) {
|
||||||
fprintf(stderr, "GGML is not compiled with AMX support!\n");
|
fprintf(stderr, "GGML is not compiled with AMX support!\n");
|
||||||
return ggml_backend_t{};
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {
|
void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {
|
||||||
|
@ -44,7 +44,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GGML_USE_LLAMAFILE
|
#ifdef GGML_USE_LLAMAFILE
|
||||||
#include <llamafile/sgemm.h>
|
#include "llamafile/sgemm.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
|
@ -22,6 +22,10 @@
|
|||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__F16C__)
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
@ -15,8 +15,8 @@ using namespace metal;
|
|||||||
// ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
|
// ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
|
||||||
//
|
//
|
||||||
// cmd:
|
// cmd:
|
||||||
// .../usr/bin/metal -dM -E -c ggml/src/ggml-metal.metal
|
// .../usr/bin/metal -dM -E -c ggml/src/ggml-metal/ggml-metal.metal
|
||||||
// .../usr/bin/metal -dM -E -c -target air64-apple-ios14.0 ggml/src/ggml-metal.metal
|
// .../usr/bin/metal -dM -E -c -target air64-apple-ios14.0 ggml/src/ggml-metal/ggml-metal.metal
|
||||||
//
|
//
|
||||||
#if __METAL_VERSION__ < 310 && defined(GGML_METAL_USE_BF16)
|
#if __METAL_VERSION__ < 310 && defined(GGML_METAL_USE_BF16)
|
||||||
#undef GGML_METAL_USE_BF16
|
#undef GGML_METAL_USE_BF16
|
||||||
|
@ -368,7 +368,7 @@ void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
|
|||||||
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
|
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
|
||||||
int64_t i = 0;
|
int64_t i = 0;
|
||||||
#if defined(__F16C__)
|
#if defined(__F16C__)
|
||||||
if (ggml_cpu_has_f16c()) {
|
//if (ggml_cpu_has_f16c()) {
|
||||||
for (; i + 7 < n; i += 8) {
|
for (; i + 7 < n; i += 8) {
|
||||||
__m256 x_vec = _mm256_loadu_ps(x + i);
|
__m256 x_vec = _mm256_loadu_ps(x + i);
|
||||||
__m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
__m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
||||||
@ -379,7 +379,7 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
|
|||||||
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
||||||
_mm_storel_epi64((__m128i *)(y + i), y_vec);
|
_mm_storel_epi64((__m128i *)(y + i), y_vec);
|
||||||
}
|
}
|
||||||
}
|
//}
|
||||||
#endif
|
#endif
|
||||||
for (; i < n; i++) {
|
for (; i < n; i++) {
|
||||||
y[i] = GGML_FP32_TO_FP16(x[i]);
|
y[i] = GGML_FP32_TO_FP16(x[i]);
|
||||||
@ -389,7 +389,7 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
|
|||||||
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
|
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
|
||||||
int64_t i = 0;
|
int64_t i = 0;
|
||||||
#if defined(__AVX512F__)
|
#if defined(__AVX512F__)
|
||||||
if (ggml_cpu_has_avx512()) {
|
//if (ggml_cpu_has_avx512()) {
|
||||||
for (; i + 16 <= n; i += 16) {
|
for (; i + 16 <= n; i += 16) {
|
||||||
_mm512_storeu_ps(y + i,
|
_mm512_storeu_ps(y + i,
|
||||||
_mm512_castsi512_ps(
|
_mm512_castsi512_ps(
|
||||||
@ -399,10 +399,10 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
|
|||||||
(const __m256i *)(x + i))),
|
(const __m256i *)(x + i))),
|
||||||
16)));
|
16)));
|
||||||
}
|
}
|
||||||
}
|
//}
|
||||||
#endif
|
#endif
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
if (ggml_cpu_has_avx2()) {
|
//if (ggml_cpu_has_avx2()) {
|
||||||
for (; i + 8 <= n; i += 8) {
|
for (; i + 8 <= n; i += 8) {
|
||||||
_mm256_storeu_ps(y + i,
|
_mm256_storeu_ps(y + i,
|
||||||
_mm256_castsi256_ps(
|
_mm256_castsi256_ps(
|
||||||
@ -412,7 +412,7 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
|
|||||||
(const __m128i *)(x + i))),
|
(const __m128i *)(x + i))),
|
||||||
16)));
|
16)));
|
||||||
}
|
}
|
||||||
}
|
//}
|
||||||
#endif
|
#endif
|
||||||
for (; i < n; i++) {
|
for (; i < n; i++) {
|
||||||
y[i] = GGML_BF16_TO_FP32(x[i]);
|
y[i] = GGML_BF16_TO_FP32(x[i]);
|
||||||
|
@ -237,7 +237,6 @@ int main(int argc, char** argv) {
|
|||||||
int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64);
|
int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64);
|
||||||
int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64);
|
int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64);
|
||||||
|
|
||||||
const auto * funcs = ggml_get_type_traits(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0);
|
|
||||||
const auto * funcs_cpu = ggml_get_type_traits_cpu(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0);
|
const auto * funcs_cpu = ggml_get_type_traits_cpu(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0);
|
||||||
|
|
||||||
std::vector<block_q4_0> q40;
|
std::vector<block_q4_0> q40;
|
||||||
|
Loading…
Reference in New Issue
Block a user