mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-11 13:30:35 +00:00
metal : build metallib + fix embed path (#6015)
* metal : build metallib + fix embed path ggml-ci * metal : fix embed build + update library load logic ggml-ci * metal : fix embeded library build ggml-ci * ci : fix iOS builds to use embedded library
This commit is contained in:
parent
0fd6c1f015
commit
381da2d9f0
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@ -333,6 +333,7 @@ jobs:
|
|||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -G Xcode .. \
|
cmake -G Xcode .. \
|
||||||
|
-DLLAMA_METAL_EMBED_LIBRARY=ON \
|
||||||
-DLLAMA_BUILD_EXAMPLES=OFF \
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
-DLLAMA_BUILD_TESTS=OFF \
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=OFF \
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
@ -361,6 +362,7 @@ jobs:
|
|||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -G Xcode .. \
|
cmake -G Xcode .. \
|
||||||
|
-DLLAMA_METAL_EMBED_LIBRARY=ON \
|
||||||
-DLLAMA_BUILD_EXAMPLES=OFF \
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
-DLLAMA_BUILD_TESTS=OFF \
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=OFF \
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -25,6 +25,8 @@
|
|||||||
.vscode/
|
.vscode/
|
||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
|
ggml-metal-embed.metal
|
||||||
|
|
||||||
lcov-report/
|
lcov-report/
|
||||||
gcovr-report/
|
gcovr-report/
|
||||||
|
|
||||||
|
@ -200,9 +200,6 @@ if (LLAMA_METAL)
|
|||||||
add_compile_definitions(GGML_METAL_NDEBUG)
|
add_compile_definitions(GGML_METAL_NDEBUG)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# get full path to the file
|
|
||||||
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
|
|
||||||
|
|
||||||
# copy ggml-common.h and ggml-metal.metal to bin directory
|
# copy ggml-common.h and ggml-metal.metal to bin directory
|
||||||
configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
|
configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
|
||||||
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
|
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
|
||||||
@ -211,53 +208,62 @@ if (LLAMA_METAL)
|
|||||||
enable_language(ASM)
|
enable_language(ASM)
|
||||||
add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
|
add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
|
||||||
|
|
||||||
|
set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
|
||||||
set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
|
set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
|
||||||
|
|
||||||
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
|
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
|
||||||
set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s")
|
|
||||||
|
# merge ggml-common.h and ggml-metal.metal into a single file
|
||||||
|
set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
|
||||||
|
set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")
|
||||||
|
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${EMBED_METALLIB_ASSEMBLY}
|
OUTPUT ${METALLIB_EMBED_ASM}
|
||||||
COMMAND echo ".section __DATA,__ggml_metallib" > ${EMBED_METALLIB_ASSEMBLY}
|
COMMAND echo "Embedding Metal library"
|
||||||
COMMAND echo ".globl _ggml_metallib_start" >> ${EMBED_METALLIB_ASSEMBLY}
|
COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
|
||||||
COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY}
|
COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM}
|
||||||
COMMAND echo ".incbin \\\"${METALLIB_SOURCE}\\\"" >> ${EMBED_METALLIB_ASSEMBLY}
|
COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM}
|
||||||
COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY}
|
COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM}
|
||||||
COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY}
|
COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
|
||||||
DEPENDS ${METALLIB_SOURCE}
|
COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM}
|
||||||
|
COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM}
|
||||||
|
DEPENDS ggml-metal.metal ggml-common.h
|
||||||
COMMENT "Generate assembly for embedded Metal library"
|
COMMENT "Generate assembly for embedded Metal library"
|
||||||
)
|
)
|
||||||
|
|
||||||
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY})
|
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
|
||||||
endif()
|
else()
|
||||||
|
if (LLAMA_METAL_SHADER_DEBUG)
|
||||||
if (LLAMA_METAL_SHADER_DEBUG)
|
# custom command to do the following:
|
||||||
# custom command to do the following:
|
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
|
||||||
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
|
# xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
|
||||||
# xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
|
#
|
||||||
#
|
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
|
||||||
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
|
# disabling fast math is needed in order to pass tests/test-backend-ops
|
||||||
# disabling fast math is needed in order to pass tests/test-backend-ops
|
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
|
||||||
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
|
# note: unfortunately, we have to call it default.metallib instead of ggml.metallib
|
||||||
# note: unfortunately, we have to call it default.metallib instead of ggml.metallib
|
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720
|
||||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720
|
set(XC_FLAGS -fno-fast-math -fno-inline -g)
|
||||||
set(XC_FLAGS -fno-fast-math -fno-inline -g)
|
else()
|
||||||
if (LLAMA_QKK_64)
|
set(XC_FLAGS -O3)
|
||||||
set(XC_FLAGS ${XC_FLAGS} -DQK_K=64)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||||
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
||||||
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||||
DEPENDS ggml-metal.metal
|
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
||||||
|
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
|
||||||
|
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
|
||||||
|
DEPENDS ggml-metal.metal ggml-common.h
|
||||||
COMMENT "Compiling Metal kernels"
|
COMMENT "Compiling Metal kernels"
|
||||||
)
|
)
|
||||||
|
|
||||||
add_custom_target(
|
add_custom_target(
|
||||||
ggml-metal ALL
|
ggml-metal ALL
|
||||||
DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||||
)
|
)
|
||||||
endif()
|
endif() # LLAMA_METAL_EMBED_LIBRARY
|
||||||
|
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
|
||||||
${FOUNDATION_LIBRARY}
|
${FOUNDATION_LIBRARY}
|
||||||
|
15
Makefile
15
Makefile
@ -557,15 +557,16 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
|
|||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
ifdef LLAMA_METAL_EMBED_LIBRARY
|
ifdef LLAMA_METAL_EMBED_LIBRARY
|
||||||
ggml-metal-embed.o: ggml-metal.metal
|
ggml-metal-embed.o: ggml-metal.metal ggml-common.h
|
||||||
@echo "Embedding Metal library"
|
@echo "Embedding Metal library"
|
||||||
|
@sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
|
||||||
$(eval TEMP_ASSEMBLY=$(shell mktemp))
|
$(eval TEMP_ASSEMBLY=$(shell mktemp))
|
||||||
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
|
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
|
||||||
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
|
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
|
||||||
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
|
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
|
||||||
@echo ".incbin \"$<\"" >> $(TEMP_ASSEMBLY)
|
@echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
|
||||||
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
|
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
|
||||||
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
|
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
|
||||||
@$(AS) $(TEMP_ASSEMBLY) -o $@
|
@$(AS) $(TEMP_ASSEMBLY) -o $@
|
||||||
@rm -f ${TEMP_ASSEMBLY}
|
@rm -f ${TEMP_ASSEMBLY}
|
||||||
endif
|
endif
|
||||||
|
49
ggml-metal.m
49
ggml-metal.m
@ -280,6 +280,11 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
id<MTLLibrary> metal_library;
|
id<MTLLibrary> metal_library;
|
||||||
|
|
||||||
// load library
|
// load library
|
||||||
|
//
|
||||||
|
// - first check if the library is embedded
|
||||||
|
// - then check if the library is in the bundle
|
||||||
|
// - if not found, load the source and compile it
|
||||||
|
// - if that fails, return NULL
|
||||||
{
|
{
|
||||||
NSBundle * bundle = nil;
|
NSBundle * bundle = nil;
|
||||||
#ifdef SWIFT_PACKAGE
|
#ifdef SWIFT_PACKAGE
|
||||||
@ -287,12 +292,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
#else
|
#else
|
||||||
bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
|
bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
NSError * error = nil;
|
NSError * error = nil;
|
||||||
NSString * libPath = [bundle pathForResource:@"default" ofType:@"metallib"];
|
|
||||||
if (libPath != nil) {
|
#if GGML_METAL_EMBED_LIBRARY
|
||||||
|
const bool try_metallib = false;
|
||||||
|
#else
|
||||||
|
const bool try_metallib = true;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
|
||||||
|
if (try_metallib && path_lib != nil) {
|
||||||
// pre-compiled library found
|
// pre-compiled library found
|
||||||
NSURL * libURL = [NSURL fileURLWithPath:libPath];
|
NSURL * libURL = [NSURL fileURLWithPath:path_lib];
|
||||||
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
|
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
|
||||||
|
|
||||||
metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
|
metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
|
||||||
if (error) {
|
if (error) {
|
||||||
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||||
@ -305,31 +319,34 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
extern const char ggml_metallib_start[];
|
extern const char ggml_metallib_start[];
|
||||||
extern const char ggml_metallib_end[];
|
extern const char ggml_metallib_end[];
|
||||||
|
|
||||||
NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
|
NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
|
||||||
#else
|
#else
|
||||||
GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
|
GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
|
||||||
|
|
||||||
NSString * sourcePath;
|
NSString * path_source;
|
||||||
NSString * ggmlMetalPathResources = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
|
NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
|
||||||
|
|
||||||
GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, ggmlMetalPathResources ? [ggmlMetalPathResources UTF8String] : "nil");
|
GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
|
||||||
|
|
||||||
if (ggmlMetalPathResources) {
|
if (path_resource) {
|
||||||
sourcePath = [ggmlMetalPathResources stringByAppendingPathComponent:@"ggml-metal.metal"];
|
path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
|
||||||
} else {
|
} else {
|
||||||
sourcePath = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
|
path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
|
||||||
}
|
}
|
||||||
if (sourcePath == nil) {
|
|
||||||
|
if (path_source == nil) {
|
||||||
GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
|
GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
|
||||||
sourcePath = @"ggml-metal.metal";
|
path_source = @"ggml-metal.metal";
|
||||||
}
|
}
|
||||||
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [sourcePath UTF8String]);
|
|
||||||
NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error];
|
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
|
||||||
|
|
||||||
|
NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
|
||||||
if (error) {
|
if (error) {
|
||||||
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#endif
|
#endif // GGML_METAL_EMBED_LIBRARY
|
||||||
|
|
||||||
@autoreleasepool {
|
@autoreleasepool {
|
||||||
// dictionary of preprocessor macros
|
// dictionary of preprocessor macros
|
||||||
|
@ -4,9 +4,6 @@
|
|||||||
|
|
||||||
#include <metal_stdlib>
|
#include <metal_stdlib>
|
||||||
|
|
||||||
#define GGML_COMMON_IMPL_METAL
|
|
||||||
#include "ggml-common.h"
|
|
||||||
|
|
||||||
using namespace metal;
|
using namespace metal;
|
||||||
|
|
||||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||||
|
Loading…
Reference in New Issue
Block a user