From c918fe8dca8fa1c4602427e0a4b88e20046f6c34 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 17 Jan 2024 18:38:39 +0200 Subject: [PATCH] metal : create autorelease pool during library build (#4970) * metal : create autorelease pool during library build ggml-ci * test : simplify ggml-ci --- .gitignore | 1 + Makefile | 5 ++++- ci/run.sh | 2 ++ ggml-metal.m | 19 +++++++++---------- tests/CMakeLists.txt | 1 + tests/test-autorelease.cpp | 28 ++++++++++++++++++++++++++++ 6 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 tests/test-autorelease.cpp diff --git a/.gitignore b/.gitignore index fba207045..5ab81445d 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,4 @@ poetry.toml /tests/test-tokenizer-1-bpe /tests/test-rope /tests/test-backend-ops +/tests/test-autorelease diff --git a/Makefile b/Makefile index 995b89f7a..a8658a596 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ TEST_TARGETS = \ tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \ tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \ tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \ - tests/test-backend-ops + tests/test-backend-ops tests/test-autorelease # Code coverage output files COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report @@ -747,3 +747,6 @@ tests/test-c.o: tests/test-c.c llama.h tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) diff --git a/ci/run.sh b/ci/run.sh index 47a254f4c..86293f0db 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -179,6 +179,8 @@ function gg_run_open_llama_3b_v2 { wiki_test_60="${path_wiki}/wiki.test-60.raw" + ./bin/test-autorelease ${model_f16} + ./bin/quantize ${model_f16} ${model_q8_0} q8_0 ./bin/quantize ${model_f16} ${model_q4_0} q4_0 ./bin/quantize ${model_f16} ${model_q4_1} q4_1 diff --git a/ggml-metal.m b/ggml-metal.m index 8bb4edd64..66d4d675e 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -303,22 +303,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { return NULL; } - // dictionary of preprocessor macros - NSMutableDictionary * prep = [NSMutableDictionary dictionary]; + @autoreleasepool { + // dictionary of preprocessor macros + NSMutableDictionary * prep = [NSMutableDictionary dictionary]; #ifdef GGML_QKK_64 - prep[@"QK_K"] = @(64); + prep[@"QK_K"] = @(64); #endif - MTLCompileOptions* options = [MTLCompileOptions new]; - options.preprocessorMacros = prep; + MTLCompileOptions* options = [MTLCompileOptions new]; + options.preprocessorMacros = prep; - //[options setFastMathEnabled:false]; + //[options setFastMathEnabled:false]; - ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error]; - - [options release]; - [prep release]; + ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error]; + } } if (error) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7c932240d..d7aaab843 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -49,6 +49,7 @@ llama_build_and_test_executable(test-llama-grammar.cpp) llama_build_and_test_executable(test-grad0.cpp) # llama_build_and_test_executable(test-opt.cpp) # SLOW llama_build_and_test_executable(test-backend-ops.cpp) +llama_build_and_test_executable(test-autorelease.cpp) llama_build_and_test_executable(test-rope.cpp) diff --git a/tests/test-autorelease.cpp b/tests/test-autorelease.cpp new file mode 100644 index 000000000..289c6ba6c --- /dev/null +++ b/tests/test-autorelease.cpp @@ -0,0 +1,28 @@ +// ref: https://github.com/ggerganov/llama.cpp/issues/4952#issuecomment-1892864763 + +#include +#include +#include + +#include "llama.h" + +// This creates a new context inside a pthread and then tries to exit cleanly. +int main(int argc, char ** argv) { + if (argc < 2) { + printf("Usage: %s model.gguf\n", argv[0]); + return 0; // intentionally return success + } + + const std::string fname = argv[1]; + + std::thread([&fname]() { + llama_backend_init(false); + auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params()); + auto * ctx = llama_new_context_with_model(model, llama_context_default_params()); + llama_free(ctx); + llama_free_model(model); + llama_backend_free(); + }).join(); + + return 0; +}