refactor cmake build

use MODULE target type for dl backend
set backend output directory to the runtime directory
ggml_backend_load_all searches backends in the system path first, then in the executable directory

ggml-ci
This commit is contained in:
slaren 2024-11-24 23:22:16 +01:00
parent 402a0e94dc
commit bd9f7b4297
15 changed files with 221 additions and 159 deletions

View File

@ -202,8 +202,8 @@ endif()
# ggml
if (GGML_BACKEND_DL)
add_compile_definitions(GGML_BACKEND_DL)
if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
endif()
add_library(ggml-base
@ -234,6 +234,27 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(ggml PRIVATE dl)
endif()
function(ggml_add_backend_library backend)
if (GGML_BACKEND_DL)
add_library(${backend} MODULE ${ARGN})
# write the shared library to the output directory
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
else()
add_library(${backend} ${ARGN})
target_link_libraries(ggml PUBLIC ${backend})
install(TARGETS ${backend} LIBRARY)
endif()
target_link_libraries(${backend} PRIVATE ggml-base)
target_include_directories(${backend} PRIVATE ..)
if (${BUILD_SHARED_LIBS})
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
endif()
endfunction()
function(ggml_add_backend backend)
string(TOUPPER "GGML_${backend}" backend_id)
if (${backend_id})
@ -244,18 +265,7 @@ function(ggml_add_backend backend)
# however, currently it is necessary for AMX, since it is enabled by default on llama.cpp
if (${backend_id})
message(STATUS "Including ${backend} backend")
if (${BUILD_SHARED_LIBS})
target_compile_definitions(${backend_target} PRIVATE GGML_BACKEND_BUILD)
target_compile_definitions(${backend_target} PUBLIC GGML_BACKEND_SHARED)
if (GGML_BACKEND_DL)
target_compile_definitions(${backend_target} PRIVATE GGML_BACKEND_DL)
endif()
endif()
if (GGML_BACKEND_DL)
install(TARGETS ${backend_target} RUNTIME)
else()
install(TARGETS ${backend_target} LIBRARY)
target_link_libraries(ggml PUBLIC ${backend_target})
if (NOT GGML_BACKEND_DL)
string(TOUPPER "GGML_USE_${backend}" backend_use)
target_compile_definitions(ggml PUBLIC ${backend_use})
endif()
@ -271,10 +281,10 @@ ggml_add_backend(CUDA)
ggml_add_backend(HIP)
ggml_add_backend(Kompute)
ggml_add_backend(METAL)
ggml_add_backend(MUSA)
ggml_add_backend(RPC)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(MUSA)
foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)

View File

@ -9,12 +9,10 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MA
file(GLOB GGML_SOURCES_AMX "*.cpp")
add_library(ggml-amx
ggml_add_backend_library(ggml-amx
${GGML_HEADERS_AMX}
${GGML_SOURCES_AMX})
target_link_libraries(ggml-amx PRIVATE ggml-base)
target_include_directories(ggml-amx PRIVATE . ..)
${GGML_SOURCES_AMX}
)
# this is duplicated from the CPU backend, since the AMX backend also depends on the architecture flags
# TODO: integrate AMX backend into the CPU backend

View File

@ -63,20 +63,20 @@ extern "C" {
enum ggml_backend_buffer_usage usage;
};
ggml_backend_buffer_t ggml_backend_buffer_init(
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
ggml_backend_buffer_type_t buft,
struct ggml_backend_buffer_i iface,
void * context,
size_t size);
// do not use directly, use ggml_backend_tensor_copy instead
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
// multi-buffer
// buffer that contains a collection of buffers
ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
//
// Backend (stream)
@ -205,10 +205,12 @@ extern "C" {
};
// Internal backend registry API
void ggml_backend_register(ggml_backend_reg_t reg);
void ggml_backend_device_register(ggml_backend_dev_t device);
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
// Add backend dynamic loading support to the backend
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
#ifdef GGML_BACKEND_DL
#ifdef __cplusplus
# define GGML_BACKEND_DL_IMPL(reg_fn) \

View File

@ -3,6 +3,7 @@
#include "ggml-impl.h"
#include <algorithm>
#include <cstring>
#include <string>
#include <vector>
#ifdef _WIN32
@ -11,11 +12,14 @@
# define NOMINMAX
# endif
# include <windows.h>
#elif defined(__APPLE__)
# include <mach-o/dyld.h>
# include <dlfcn.h>
#else
# include <dlfcn.h>
# include <unistd.h>
#endif
// Backend registry
#ifdef GGML_USE_CPU
#include "ggml-cpu.h"
@ -128,10 +132,59 @@ struct ggml_backend_registry {
devices.push_back(device);
}
void unload_backend(ggml_backend_reg_t reg, bool silent) {
ggml_backend_reg_t load_backend(const char * path, bool silent) {
#ifdef _WIN32
HMODULE handle = LoadLibraryA(path);
if (!handle) {
if (!silent) {
GGML_LOG_INFO("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
}
return nullptr;
}
ggml_backend_init_t backend_init = (ggml_backend_init_t) GetProcAddress(handle, "ggml_backend_init");
if (!backend_init) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
}
FreeLibrary(handle);
return nullptr;
}
#else
void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
if (!handle) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
}
return nullptr;
}
auto * backend_init = (ggml_backend_init_t) dlsym(handle, "ggml_backend_init");
if (!backend_init) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %s\n", __func__, path, dlerror());
}
dlclose(handle);
return nullptr;
}
#endif
ggml_backend_reg_t reg = backend_init();
if (!reg) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to initialize backend from %s\n", __func__, path);
}
#ifdef _WIN32
FreeLibrary(handle);
#else
dlclose(handle);
#endif
return nullptr;
}
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
register_backend(reg, handle);
return reg;
}
void unload_backend(ggml_backend_reg_t reg, bool silent) {
auto it = std::find_if(backends.begin(), backends.end(),
[reg](ggml_backend_reg_entry entry) { return entry.reg == reg; });
@ -258,48 +311,9 @@ ggml_backend_t ggml_backend_init_best(void) {
return ggml_backend_dev_init(dev, nullptr);
}
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
// Dynamic loading
ggml_backend_reg_t ggml_backend_load(const char * path) {
#ifdef _WIN32
HMODULE handle = LoadLibraryA(path);
if (!handle) {
GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
return nullptr;
}
ggml_backend_init_t backend_init = (ggml_backend_init_t) GetProcAddress(handle, "ggml_backend_init");
if (!backend_init) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
FreeLibrary(handle);
return nullptr;
}
#else
void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
if (!handle) {
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
return nullptr;
}
auto * backend_init = (ggml_backend_init_t) dlsym(handle, "ggml_backend_init");
if (!backend_init) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %s\n", __func__, path, dlerror());
dlclose(handle);
return nullptr;
}
#endif
ggml_backend_reg_t reg = backend_init();
if (!reg) {
GGML_LOG_ERROR("%s: failed to initialize backend from %s\n", __func__, path);
#ifdef _WIN32
FreeLibrary(handle);
#else
dlclose(handle);
#endif
return nullptr;
}
GGML_LOG_DEBUG("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
get_reg().register_backend(reg, handle);
return reg;
return get_reg().load_backend(path, false);
}
void ggml_backend_unload(ggml_backend_reg_t reg) {
@ -307,26 +321,82 @@ void ggml_backend_unload(ggml_backend_reg_t reg) {
}
void ggml_backend_load_all() {
#ifdef _WIN32
#define GGML_BACKEND_PATH(backend) "ggml-" backend ".dll"
#elif defined(__APPLE__)
// path is hardcoded to the cmake build directory for now
// FIXME: should also search default system paths
#define GGML_BACKEND_PATH(backend) "build/ggml/src/ggml-" backend "/libggml-" backend ".dylib"
#else
#define GGML_BACKEND_PATH(backend) "build/ggml/src/ggml-" backend "/libggml-" backend ".so"
std::vector<std::string> search_prefix;
// add the executable directory to the search path
// FIXME: this is convenient for development, but it should probably be disabled in production
#if defined(__APPLE__)
// get executable path
std::vector<char> path;
uint32_t size;
while (true) {
size = path.size();
if (_NSGetExecutablePath(path.data(), &size) == 0) {
break;
}
path.resize(size);
}
std::string base_path(path.data(), size);
// remove executable name
auto last_slash = base_path.find_last_of('/');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
search_prefix.push_back(base_path + "/");
#elif defined(__linux__)
std::string base_path = ".";
std::vector<char> path(1024);
while (true) {
// get executable path
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
if (len == -1) {
break;
}
if (len < (ssize_t) path.size()) {
base_path = std::string(path.data(), len);
// remove executable name
auto last_slash = base_path.find_last_of('/');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
break;
}
path.resize(path.size() * 2);
}
search_prefix.push_back(base_path + "/");
#endif
ggml_backend_load(GGML_BACKEND_PATH("amx"));
ggml_backend_load(GGML_BACKEND_PATH("blas"));
ggml_backend_load(GGML_BACKEND_PATH("cann"));
ggml_backend_load(GGML_BACKEND_PATH("cuda"));
ggml_backend_load(GGML_BACKEND_PATH("hip"));
ggml_backend_load(GGML_BACKEND_PATH("kompute"));
ggml_backend_load(GGML_BACKEND_PATH("metal"));
ggml_backend_load(GGML_BACKEND_PATH("rpc"));
ggml_backend_load(GGML_BACKEND_PATH("sycl"));
ggml_backend_load(GGML_BACKEND_PATH("vulkan"));
ggml_backend_load(GGML_BACKEND_PATH("musa"));
ggml_backend_load(GGML_BACKEND_PATH("cpu"));
auto & reg = get_reg();
auto try_load = [&](const std::string & name) {
std::string os_name;
#ifdef _WIN32
os_name = "ggml-" + name + ".dll";
#else
os_name = "libggml-" + name + ".so";
#endif
if (reg.load_backend(os_name.c_str(), true)) {
return;
}
for (const auto & prefix : search_prefix) {
if (reg.load_backend((prefix + os_name).c_str(), true)) {
return;
}
}
};
try_load("amx");
try_load("blas");
try_load("cann");
try_load("cuda");
try_load("hip");
try_load("kompute");
try_load("metal");
try_load("rpc");
try_load("sycl");
try_load("vulkan");
try_load("musa");
try_load("cpu");
}

View File

@ -11,13 +11,10 @@ find_package(BLAS)
if (BLAS_FOUND)
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
add_library(ggml-blas
ggml_add_backend_library(ggml-blas
ggml-blas.cpp
)
target_link_libraries(ggml-blas PRIVATE ggml-base)
target_include_directories(ggml-blas PRIVATE . ..)
if (${GGML_BLAS_VENDOR} MATCHES "Apple")
add_compile_definitions(ACCELERATE_NEW_LAPACK)
add_compile_definitions(ACCELERATE_LAPACK_ILP64)

View File

@ -61,9 +61,9 @@ if (CANN_INSTALL_DIR)
file(GLOB GGML_SOURCES_CANN "*.cpp")
add_library(ggml-cann ${GGML_SOURCES_CANN})
target_link_libraries(ggml-cann PRIVATE ggml-base ${CANN_LIBRARIES})
target_include_directories(ggml-cann PRIVATE . .. ${CANN_INCLUDE_DIRS})
ggml_add_backend_library(ggml-cann ${GGML_SOURCES_CANN})
target_link_libraries(ggml-cann PRIVATE ${CANN_LIBRARIES})
target_include_directories(ggml-cann PRIVATE ${CANN_INCLUDE_DIRS})
target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")

View File

@ -1,4 +1,4 @@
add_library(ggml-cpu
ggml_add_backend_library(ggml-cpu
ggml-cpu.c
ggml-cpu.cpp
ggml-cpu-aarch64.c
@ -7,8 +7,7 @@ add_library(ggml-cpu
ggml-cpu-quants.h
)
target_link_libraries(ggml-cpu PRIVATE ggml-base)
target_include_directories(ggml-cpu PRIVATE . ..)
target_include_directories(ggml-cpu PRIVATE .)
if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)

View File

@ -46,14 +46,11 @@ if (CUDAToolkit_FOUND)
list(APPEND GGML_SOURCES_CUDA ${SRCS})
endif()
add_library(ggml-cuda
ggml_add_backend_library(ggml-cuda
${GGML_HEADERS_CUDA}
${GGML_SOURCES_CUDA}
)
target_link_libraries(ggml-cuda PRIVATE ggml-base)
target_include_directories(ggml-cuda PRIVATE . ..)
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
if (GGML_CUDA_GRAPHS)

View File

@ -64,12 +64,10 @@ else()
list(APPEND GGML_SOURCES_ROCM ${SRCS})
endif()
add_library(ggml-hip
ggml_add_backend_library(ggml-hip
${GGML_HEADERS_ROCM}
${GGML_SOURCES_ROCM})
target_link_libraries(ggml-hip PRIVATE ggml-base)
target_include_directories(ggml-hip PRIVATE . ..)
${GGML_SOURCES_ROCM}
)
# TODO: do not use CUDA definitions for HIP
target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)

View File

@ -6,13 +6,13 @@ if (NOT glslc_executable)
message(FATAL_ERROR "glslc not found")
endif()
add_library(ggml-kompute
ggml_add_backend_library(ggml-kompute
ggml-kompute.cpp
../../include/ggml-kompute.h
)
target_link_libraries(ggml-kompute PRIVATE ggml-base kompute)
target_include_directories(ggml-kompute PRIVATE . .. ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(ggml-kompute PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)

View File

@ -4,19 +4,16 @@ find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
message(STATUS "Metal framework found")
add_library(ggml-metal
ggml_add_backend_library(ggml-metal
ggml-metal.m
)
target_link_libraries(ggml-metal PRIVATE
ggml-base
${FOUNDATION_LIBRARY}
${METAL_FRAMEWORK}
${METALKIT_FRAMEWORK}
)
target_include_directories(ggml-metal PRIVATE . ..)
if (GGML_METAL_NDEBUG)
add_compile_definitions(GGML_METAL_NDEBUG)
endif()

View File

@ -47,12 +47,10 @@ if (MUSAToolkit_FOUND)
set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
endforeach()
add_library(ggml-musa
ggml_add_backend_library(ggml-musa
${GGML_HEADERS_MUSA}
${GGML_SOURCES_MUSA})
target_link_libraries(ggml-musa PRIVATE ggml-base)
target_include_directories(ggml-musa PRIVATE . ..)
${GGML_SOURCES_MUSA}
)
# TODO: do not use CUDA definitions for MUSA
target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)

View File

@ -1,10 +1,8 @@
message(STATUS "Using RPC backend")
add_library(ggml-rpc
ggml-rpc.cpp)
target_link_libraries(ggml-rpc PRIVATE ggml-base)
target_include_directories(ggml-rpc PRIVATE . ..)
ggml_add_backend_library(ggml-rpc
ggml-rpc.cpp
)
if (WIN32)
target_link_libraries(ggml-rpc PRIVATE ws2_32)

View File

@ -16,12 +16,10 @@ endif()
message(STATUS "SYCL found")
#todo: AOT
add_library(ggml-sycl
ggml_add_backend_library(ggml-sycl
ggml-sycl.cpp
../../include/ggml-sycl.h)
target_link_libraries(ggml-sycl PRIVATE ggml-base)
target_include_directories(ggml-sycl PRIVATE . ..)
../../include/ggml-sycl.h
)
if (GGML_SYCL_F16)
if (GGML_SYCL_TARGET STREQUAL "AMD")

View File

@ -3,13 +3,13 @@ find_package(Vulkan COMPONENTS glslc REQUIRED)
if (Vulkan_FOUND)
message(STATUS "Vulkan found")
add_library(ggml-vulkan
ggml_add_backend_library(ggml-vulkan
ggml-vulkan.cpp
../../include/ggml-vulkan.h
)
target_link_libraries(ggml-vulkan PRIVATE ggml-base Vulkan::Vulkan)
target_include_directories(ggml-vulkan PRIVATE . .. ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
# Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
# Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector