Compare commits

..

1 Commits

Author SHA1 Message Date
Jesse Gross
7bc4f63f2b
Merge a2d4b6fc81 into 32d6ee6385 2024-12-24 09:26:04 +08:00
5 changed files with 52 additions and 85 deletions

View File

@ -234,7 +234,6 @@ function(ggml_add_backend_library backend)
# write the shared library to the output directory # write the shared library to the output directory
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL) target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
add_dependencies(ggml ${backend})
else() else()
add_library(${backend} ${ARGN}) add_library(${backend} ${ARGN})
target_link_libraries(ggml PUBLIC ${backend}) target_link_libraries(ggml PUBLIC ${backend})

View File

@ -66,26 +66,6 @@
#include "ggml-kompute.h" #include "ggml-kompute.h"
#endif #endif
// disable C++17 deprecation warning for std::codecvt_utf8
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
static std::wstring utf8_to_utf16(const std::string & str) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return converter.from_bytes(str);
}
static std::string utf16_to_utf8(const std::wstring & str) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return converter.to_bytes(str);
}
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
#ifdef _WIN32 #ifdef _WIN32
using dl_handle = std::remove_pointer_t<HMODULE>; using dl_handle = std::remove_pointer_t<HMODULE>;
@ -108,6 +88,11 @@ static dl_handle * dl_load_library(const std::wstring & path) {
return handle; return handle;
} }
static dl_handle * dl_load_library(const std::string & path) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return dl_load_library(converter.from_bytes(path));
}
static void * dl_get_sym(dl_handle * handle, const char * name) { static void * dl_get_sym(dl_handle * handle, const char * name) {
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
@ -129,8 +114,8 @@ struct dl_handle_deleter {
} }
}; };
static void * dl_load_library(const std::wstring & path) { static void * dl_load_library(const std::string & path) {
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL); dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
return handle; return handle;
} }
@ -217,11 +202,11 @@ struct ggml_backend_registry {
devices.push_back(device); devices.push_back(device);
} }
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) { ggml_backend_reg_t load_backend(const char * path, bool silent) {
dl_handle_ptr handle { dl_load_library(path) }; dl_handle_ptr handle { dl_load_library(path) };
if (!handle) { if (!handle) {
if (!silent) { if (!silent) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str()); GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
} }
return nullptr; return nullptr;
} }
@ -229,7 +214,7 @@ struct ggml_backend_registry {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn && score_fn() == 0) { if (score_fn && score_fn() == 0) {
if (!silent) { if (!silent) {
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str()); GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
} }
return nullptr; return nullptr;
} }
@ -237,7 +222,7 @@ struct ggml_backend_registry {
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init"); auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
if (!backend_init_fn) { if (!backend_init_fn) {
if (!silent) { if (!silent) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str()); GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
} }
return nullptr; return nullptr;
} }
@ -246,16 +231,16 @@ struct ggml_backend_registry {
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) { if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
if (!silent) { if (!silent) {
if (!reg) { if (!reg) {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str()); GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
} else { } else {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n", GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
__func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION); __func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
} }
} }
return nullptr; return nullptr;
} }
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str()); GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
register_backend(reg, std::move(handle)); register_backend(reg, std::move(handle));
@ -391,14 +376,14 @@ ggml_backend_t ggml_backend_init_best(void) {
// Dynamic loading // Dynamic loading
ggml_backend_reg_t ggml_backend_load(const char * path) { ggml_backend_reg_t ggml_backend_load(const char * path) {
return get_reg().load_backend(utf8_to_utf16(path), false); return get_reg().load_backend(path, false);
} }
void ggml_backend_unload(ggml_backend_reg_t reg) { void ggml_backend_unload(ggml_backend_reg_t reg) {
get_reg().unload_backend(reg, true); get_reg().unload_backend(reg, true);
} }
static std::wstring get_executable_path() { static std::string get_executable_path() {
#if defined(__APPLE__) #if defined(__APPLE__)
// get executable path // get executable path
std::vector<char> path; std::vector<char> path;
@ -416,7 +401,7 @@ static std::wstring get_executable_path() {
if (last_slash != std::string::npos) { if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash); base_path = base_path.substr(0, last_slash);
} }
return utf8_to_utf16(base_path + "/"); return base_path + "/";
#elif defined(__linux__) || defined(__FreeBSD__) #elif defined(__linux__) || defined(__FreeBSD__)
std::string base_path = "."; std::string base_path = ".";
std::vector<char> path(1024); std::vector<char> path(1024);
@ -442,63 +427,57 @@ static std::wstring get_executable_path() {
path.resize(path.size() * 2); path.resize(path.size() * 2);
} }
return utf8_to_utf16(base_path + "/"); return base_path + "/";
#elif defined(_WIN32) #elif defined(_WIN32)
std::vector<wchar_t> path(MAX_PATH); std::vector<char> path(MAX_PATH);
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size()); DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
if (len == 0) { if (len == 0) {
return {}; return "";
} }
std::wstring base_path(path.data(), len); std::string base_path(path.data(), len);
// remove executable name // remove executable name
auto last_slash = base_path.find_last_of('\\'); auto last_slash = base_path.find_last_of('\\');
if (last_slash != std::string::npos) { if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash); base_path = base_path.substr(0, last_slash);
} }
return base_path + L"\\"; return base_path + "\\";
#else
return {};
#endif #endif
} }
static std::wstring backend_filename_prefix() { static std::string backend_filename_prefix() {
#ifdef _WIN32 #ifdef _WIN32
return L"ggml-"; return "ggml-";
#else #else
return L"libggml-"; return "libggml-";
#endif #endif
} }
static std::wstring backend_filename_suffix() { static std::string backend_filename_suffix() {
#ifdef _WIN32 #ifdef _WIN32
return L".dll"; return ".dll";
#else #else
return L".so"; return ".so";
#endif
}
static std::wstring path_separator() {
#ifdef _WIN32
return L"\\";
#else
return L"/";
#endif #endif
} }
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
// TODO: search system paths // TODO: search system paths
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-"; std::string file_prefix = backend_filename_prefix() + name + "-";
std::vector<std::wstring> search_paths; std::vector<std::string> search_paths;
if (user_search_path == nullptr) { if (user_search_path == nullptr) {
search_paths.push_back(L"." + path_separator()); search_paths.push_back("./");
search_paths.push_back(get_executable_path()); search_paths.push_back(get_executable_path());
} else { } else {
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator()); #if defined(_WIN32)
search_paths.push_back(std::string(user_search_path) + "\\");
#else
search_paths.push_back(std::string(user_search_path) + "/");
#endif
} }
int best_score = 0; int best_score = 0;
std::wstring best_path; std::string best_path;
namespace fs = std::filesystem; namespace fs = std::filesystem;
for (const auto & search_path : search_paths) { for (const auto & search_path : search_paths) {
@ -508,27 +487,27 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied); fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
for (const auto & entry : dir_it) { for (const auto & entry : dir_it) {
if (entry.is_regular_file()) { if (entry.is_regular_file()) {
std::wstring filename = entry.path().filename().wstring(); std::string filename = entry.path().filename().string();
std::wstring ext = entry.path().extension().wstring(); std::string ext = entry.path().extension().string();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) { if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) }; dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
if (!handle && !silent) { if (!handle && !silent) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str()); GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
} }
if (handle) { if (handle) {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn) { if (score_fn) {
int s = score_fn(); int s = score_fn();
#ifndef NDEBUG #ifndef NDEBUG
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s); GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
#endif #endif
if (s > best_score) { if (s > best_score) {
best_score = s; best_score = s;
best_path = entry.path().wstring(); best_path = entry.path().string();
} }
} else { } else {
if (!silent) { if (!silent) {
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str()); GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
} }
} }
} }
@ -540,15 +519,15 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
if (best_score == 0) { if (best_score == 0) {
// try to load the base backend // try to load the base backend
for (const auto & search_path : search_paths) { for (const auto & search_path : search_paths) {
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix(); std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
if (fs::exists(path)) { if (fs::exists(path)) {
return get_reg().load_backend(path, silent); return get_reg().load_backend(path.c_str(), silent);
} }
} }
return nullptr; return nullptr;
} }
return get_reg().load_backend(best_path, silent); return get_reg().load_backend(best_path.c_str(), silent);
} }
void ggml_backend_load_all() { void ggml_backend_load_all() {

View File

@ -135,20 +135,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif() endif()
# show enabled features # show enabled features
if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
set(FEAT_INPUT_FILE "NUL")
else()
set(FEAT_INPUT_FILE "/dev/null")
endif()
execute_process( execute_process(
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E - COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
INPUT_FILE ${FEAT_INPUT_FILE} INPUT_FILE "/dev/null"
OUTPUT_VARIABLE ARM_FEATURE OUTPUT_VARIABLE ARM_FEATURE
RESULT_VARIABLE ARM_FEATURE_RESULT RESULT_VARIABLE ARM_FEATURE_RESULT
) )
if (ARM_FEATURE_RESULT) if (ARM_FEATURE_RESULT)
message(WARNING "Failed to get ARM features") message(FATAL_ERROR "Failed to get ARM features")
else() else()
foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC) foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos) string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
@ -323,11 +317,6 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS}) target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
if (GGML_BACKEND_DL) if (GGML_BACKEND_DL)
if (GGML_NATIVE)
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
endif()
# The feature detection code is compiled as a separate target so that # The feature detection code is compiled as a separate target so that
# it can be built without the architecture flags # it can be built without the architecture flags
# Since multiple variants of the CPU backend may be included in the same # Since multiple variants of the CPU backend may be included in the same

View File

@ -1657,7 +1657,7 @@ bool llama_token_is_control_impl(const struct llama_vocab & vocab, llama_token t
} }
llama_token llama_token_bos_impl(const struct llama_vocab & vocab) { llama_token llama_token_bos_impl(const struct llama_vocab & vocab) {
return vocab.type != LLAMA_VOCAB_TYPE_WPM ? vocab.special_bos_id : vocab.special_cls_id; return vocab.special_bos_id;
} }
llama_token llama_token_eos_impl(const struct llama_vocab & vocab) { llama_token llama_token_eos_impl(const struct llama_vocab & vocab) {

View File

@ -45,7 +45,7 @@ struct llama_vocab {
id special_unk_id = 0; id special_unk_id = 0;
id special_sep_id = LLAMA_TOKEN_NULL; id special_sep_id = LLAMA_TOKEN_NULL;
id special_pad_id = LLAMA_TOKEN_NULL; id special_pad_id = LLAMA_TOKEN_NULL;
id special_cls_id = LLAMA_TOKEN_NULL; // TODO: revisit if this is really needed https://github.com/ggerganov/llama.cpp/pull/10930 id special_cls_id = LLAMA_TOKEN_NULL;
id special_mask_id = LLAMA_TOKEN_NULL; id special_mask_id = LLAMA_TOKEN_NULL;
id linefeed_id = 13; id linefeed_id = 13;