diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index cd6146421..b88e6ca80 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: { }; postPatch = '' - substituteInPlace ./ggml/src/ggml-metal.m \ + substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - substituteInPlace ./ggml/src/ggml-metal.m \ + substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" ''; diff --git a/Makefile b/Makefile index fc6ef46cd..5ec996c87 100644 --- a/Makefile +++ b/Makefile @@ -523,11 +523,11 @@ ifndef GGML_NO_ACCELERATE # Mac OS - include Accelerate framework. # `-framework Accelerate` works both with Apple Silicon and Mac Intel ifeq ($(UNAME_S),Darwin) - MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS + MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64 MK_LDFLAGS += -framework Accelerate - OBJ_GGML += ggml/src/ggml-blas.o + OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o endif endif # GGML_NO_ACCELERATE @@ -552,36 +552,36 @@ ifdef GGML_OPENBLAS MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas) MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas) MK_LDFLAGS += $(shell pkg-config --libs openblas) - OBJ_GGML += ggml/src/ggml-blas.o + OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o endif # GGML_OPENBLAS ifdef GGML_OPENBLAS64 MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64) MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64) MK_LDFLAGS += $(shell pkg-config --libs openblas64) - OBJ_GGML += ggml/src/ggml-blas.o + OBJ_GGML += src/ggml-blas/ggml-blas.o endif # GGML_OPENBLAS64 ifdef GGML_BLIS MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis MK_LDFLAGS += -lblis -L/usr/local/lib - OBJ_GGML += ggml/src/ggml-blas.o + OBJ_GGML += src/ggml-blas/ggml-blas.o endif # GGML_BLIS ifdef GGML_NVPL MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp - OBJ_GGML += ggml/src/ggml-blas.o + OBJ_GGML += src/ggml-blas/ggml-blas.o endif # GGML_NVPL ifndef GGML_NO_LLAMAFILE MK_CPPFLAGS += -DGGML_USE_LLAMAFILE - OBJ_GGML += ggml/src/llamafile/sgemm.o + OBJ_GGML += ggml/src/ggml-cpu/llamafile/sgemm.o endif ifndef GGML_NO_AMX MK_CPPFLAGS += -DGGML_USE_AMX - OBJ_GGML += ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o + OBJ_GGML += ggml/src/ggml-amx/ggml-amx.o ggml/src/ggml-amx/mmq.o endif ifdef GGML_RPC @@ -623,7 +623,7 @@ ifdef GGML_CUDA MK_NVCCFLAGS += -use_fast_math endif # GGML_MUSA - OBJ_GGML += ggml/src/ggml-cuda.o + OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) OBJ_GGML += $(OBJ_CUDA_TMPL) @@ -742,8 +742,8 @@ ggml/src/ggml-cuda/%.o: \ ggml/src/ggml-cuda/common.cuh $(NVCC_COMPILE) -ggml/src/ggml-cuda.o: \ - ggml/src/ggml-cuda.cu \ +ggml/src/ggml-cuda/ggml-cuda.o: \ + ggml/src/ggml-cuda/ggml-cuda.cu \ ggml/include/ggml-cuda.h \ ggml/include/ggml.h \ ggml/include/ggml-backend.h \ @@ -852,12 +852,12 @@ ifdef GGML_CUDA_NO_PEER_COPY HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY endif # GGML_CUDA_NO_PEER_COPY - OBJ_GGML += ggml/src/ggml-cuda.o + OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) OBJ_GGML += $(OBJ_CUDA_TMPL) -ggml/src/ggml-cuda.o: \ - ggml/src/ggml-cuda.cu \ +ggml/src/ggml-cuda/ggml-cuda.o: \ + ggml/src/ggml-cuda/ggml-cuda.cu \ ggml/include/ggml-cuda.h \ ggml/include/ggml.h \ ggml/include/ggml-backend.h \ @@ -877,7 +877,7 @@ endif # GGML_HIPBLAS ifdef GGML_METAL MK_CPPFLAGS += -DGGML_USE_METAL MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit - OBJ_GGML += ggml/src/ggml-metal.o + OBJ_GGML += ggml/src/ggml-metal/ggml-metal.o ifdef GGML_METAL_USE_BF16 MK_CPPFLAGS += -DGGML_METAL_USE_BF16 @@ -892,18 +892,18 @@ endif endif # GGML_METAL ifdef GGML_METAL -ggml/src/ggml-metal.o: \ - ggml/src/ggml-metal.m \ +ggml/src/ggml-metal/ggml-metal.o: \ + ggml/src/ggml-metal/ggml-metal.m \ ggml/include/ggml-metal.h \ ggml/include/ggml.h $(CC) $(CFLAGS) -c $< -o $@ ifdef GGML_METAL_EMBED_LIBRARY ggml/src/ggml-metal-embed.o: \ - ggml/src/ggml-metal.metal \ + ggml/src/ggml-metal/ggml-metal.metal \ ggml/src/ggml-common.h @echo "Embedding Metal library" - @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal + @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal $(eval TEMP_ASSEMBLY=$(shell mktemp -d)) @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s @@ -919,11 +919,16 @@ endif # GGML_METAL OBJ_GGML += \ ggml/src/ggml.o \ - ggml/src/ggml-cpu.o \ + ggml/src/ggml-aarch64.o \ ggml/src/ggml-alloc.o \ ggml/src/ggml-backend.o \ + ggml/src/ggml-backend-reg.o \ ggml/src/ggml-quants.o \ - ggml/src/ggml-aarch64.o + ggml/src/ggml-threading.o \ + ggml/src/ggml-cpu/ggml-cpu.o \ + ggml/src/ggml-cpu/ggml-cpu-cpp.o \ + ggml/src/ggml-cpu/ggml-cpu-aarch64.o \ + ggml/src/ggml-cpu/ggml-cpu-quants.o OBJ_LLAMA = \ src/llama.o \ @@ -1051,12 +1056,23 @@ ggml/src/ggml.o: \ ggml/include/ggml.h $(CC) $(CFLAGS) -c $< -o $@ -ggml/src/ggml-cpu.o: \ - ggml/src/ggml-cpu.c \ +ggml/src/ggml-threading.o: \ + ggml/src/ggml-threading.cpp \ + ggml/include/ggml.h + $(CXX) $(XXCFLAGS) -c $< -o $@ + +ggml/src/ggml-cpu/ggml-cpu.o: \ + ggml/src/ggml-cpu/ggml-cpu.c \ ggml/include/ggml.h \ ggml/src/ggml-common.h $(CC) $(CFLAGS) -c $< -o $@ +ggml/src/ggml-cpu/ggml-cpu-cpp.o: \ + ggml/src/ggml-cpu/ggml-cpu.cpp \ + ggml/include/ggml.h \ + ggml/src/ggml-common.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + ggml/src/ggml-alloc.o: \ ggml/src/ggml-alloc.c \ ggml/include/ggml.h \ @@ -1084,22 +1100,22 @@ ggml/src/ggml-aarch64.o: \ ggml/src/ggml-common.h $(CC) $(CFLAGS) -c $< -o $@ -ggml/src/ggml-blas.o: \ - ggml/src/ggml-blas.cpp \ +ggml/src/ggml-blas/ggml-blas.o: \ + ggml/src/ggml-blas/ggml-blas.cpp \ ggml/include/ggml-blas.h $(CXX) $(CXXFLAGS) -c $< -o $@ ifndef GGML_NO_LLAMAFILE -ggml/src/llamafile/sgemm.o: \ - ggml/src/llamafile/sgemm.cpp \ - ggml/src/llamafile/sgemm.h \ +ggml/src/ggml-cpu/llamafile/sgemm.o: \ + ggml/src/ggml-cpu/llamafile/sgemm.cpp \ + ggml/src/ggml-cpu/llamafile/sgemm.h \ ggml/include/ggml.h - $(CXX) $(CXXFLAGS) -c $< -o $@ + $(CXX) $(CXXFLAGS) -c $< -o $@ -I ggml/src -I ggml/src/ggml-cpu endif # GGML_NO_LLAMAFILE ifndef GGML_NO_AMX -ggml/src/ggml-amx.o: \ - ggml/src/ggml-amx.cpp \ +ggml/src/ggml-amx/ggml-amx.o: \ + ggml/src/ggml-amx/ggml-amx.cpp \ ggml/include/ggml-amx.h $(CXX) $(CXXFLAGS) -c $< -o $@ @@ -1250,10 +1266,11 @@ clean: rm -rvf ggml/*.a rm -rvf ggml/*.dll rm -rvf ggml/*.so - rm -vrf ggml/src/*.o - rm -rvf ggml/src/llamafile/*.o + rm -rvf ggml/src/*.o + rm -rvf ggml/src/ggml-cpu/*.o + rm -rvf ggml/src/ggml-cpu/llamafile/*.o rm -rvf common/build-info.cpp - rm -vrf ggml/src/ggml-metal-embed.metal + rm -vrf ggml/src/ggml-metal/ggml-metal-embed.metal rm -vrf ggml/src/ggml-cuda/*.o rm -vrf ggml/src/ggml-cuda/template-instances/*.o rm -vrf ggml/src/ggml-amx/*.o diff --git a/Package.swift b/Package.swift index 0f4f19018..eaa17cdb2 100644 --- a/Package.swift +++ b/Package.swift @@ -30,8 +30,8 @@ var cSettings: [CSetting] = [ ] #if canImport(Darwin) -sources.append("ggml/src/ggml-metal.m") -resources.append(.process("ggml/src/ggml-metal.metal")) +sources.append("ggml/src/ggml-metal/ggml-metal.m") +resources.append(.process("ggml/src/ggml-metal/ggml-metal.metal")) linkerSettings.append(.linkedFramework("Accelerate")) cSettings.append( contentsOf: [ diff --git a/ggml/src/ggml-amx/ggml-amx.cpp b/ggml/src/ggml-amx/ggml-amx.cpp index 2db0b720a..37da98539 100644 --- a/ggml/src/ggml-amx/ggml-amx.cpp +++ b/ggml/src/ggml-amx/ggml-amx.cpp @@ -421,9 +421,18 @@ ggml_backend_reg_t ggml_backend_amx_reg(void) { #else // if defined(__AMX_INT8__) +ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void) { + return nullptr; +} + +bool ggml_backend_is_amx(ggml_backend_t backend) { + GGML_UNUSED(backend); + return false; +} + ggml_backend_t ggml_backend_amx_init(void) { fprintf(stderr, "GGML is not compiled with AMX support!\n"); - return ggml_backend_t{}; + return nullptr; } void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) { diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index bf2bbdef6..4c45146a1 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -44,7 +44,7 @@ #endif #ifdef GGML_USE_LLAMAFILE -#include +#include "llamafile/sgemm.h" #endif #if defined(_MSC_VER) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index a040e4b56..aa4d2b85d 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -22,6 +22,10 @@ #include #endif +#if defined(__F16C__) +#include +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index e8b71a9f8..88f06c24e 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -15,8 +15,8 @@ using namespace metal; // ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf // // cmd: -// .../usr/bin/metal -dM -E -c ggml/src/ggml-metal.metal -// .../usr/bin/metal -dM -E -c -target air64-apple-ios14.0 ggml/src/ggml-metal.metal +// .../usr/bin/metal -dM -E -c ggml/src/ggml-metal/ggml-metal.metal +// .../usr/bin/metal -dM -E -c -target air64-apple-ios14.0 ggml/src/ggml-metal/ggml-metal.metal // #if __METAL_VERSION__ < 310 && defined(GGML_METAL_USE_BF16) #undef GGML_METAL_USE_BF16 diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 11f0803c4..4a97bfc32 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -368,7 +368,7 @@ void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) { void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) { int64_t i = 0; #if defined(__F16C__) - if (ggml_cpu_has_f16c()) { + //if (ggml_cpu_has_f16c()) { for (; i + 7 < n; i += 8) { __m256 x_vec = _mm256_loadu_ps(x + i); __m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT); @@ -379,7 +379,7 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) { __m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT); _mm_storel_epi64((__m128i *)(y + i), y_vec); } - } + //} #endif for (; i < n; i++) { y[i] = GGML_FP32_TO_FP16(x[i]); @@ -389,7 +389,7 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) { void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) { int64_t i = 0; #if defined(__AVX512F__) - if (ggml_cpu_has_avx512()) { + //if (ggml_cpu_has_avx512()) { for (; i + 16 <= n; i += 16) { _mm512_storeu_ps(y + i, _mm512_castsi512_ps( @@ -399,10 +399,10 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) { (const __m256i *)(x + i))), 16))); } - } + //} #endif #if defined(__AVX2__) - if (ggml_cpu_has_avx2()) { + //if (ggml_cpu_has_avx2()) { for (; i + 8 <= n; i += 8) { _mm256_storeu_ps(y + i, _mm256_castsi256_ps( @@ -412,7 +412,7 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) { (const __m128i *)(x + i))), 16))); } - } + //} #endif for (; i < n; i++) { y[i] = GGML_BF16_TO_FP32(x[i]); diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp index 67c6964b6..2dca62848 100644 --- a/pocs/vdot/vdot.cpp +++ b/pocs/vdot/vdot.cpp @@ -237,7 +237,6 @@ int main(int argc, char** argv) { int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64); int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64); - const auto * funcs = ggml_get_type_traits(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0); const auto * funcs_cpu = ggml_get_type_traits_cpu(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0); std::vector q40;