mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-14 06:49:54 +00:00
metal : create autorelease pool during library build (#4970)
* metal : create autorelease pool during library build ggml-ci * test : simplify ggml-ci
This commit is contained in:
parent
0f83e727af
commit
c918fe8dca
1
.gitignore
vendored
1
.gitignore
vendored
@ -105,3 +105,4 @@ poetry.toml
|
|||||||
/tests/test-tokenizer-1-bpe
|
/tests/test-tokenizer-1-bpe
|
||||||
/tests/test-rope
|
/tests/test-rope
|
||||||
/tests/test-backend-ops
|
/tests/test-backend-ops
|
||||||
|
/tests/test-autorelease
|
||||||
|
5
Makefile
5
Makefile
@ -9,7 +9,7 @@ TEST_TARGETS = \
|
|||||||
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
|
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
|
||||||
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
|
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
|
||||||
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
|
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
|
||||||
tests/test-backend-ops
|
tests/test-backend-ops tests/test-autorelease
|
||||||
|
|
||||||
# Code coverage output files
|
# Code coverage output files
|
||||||
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
|
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
|
||||||
@ -747,3 +747,6 @@ tests/test-c.o: tests/test-c.c llama.h
|
|||||||
|
|
||||||
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
|
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
||||||
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
@ -179,6 +179,8 @@ function gg_run_open_llama_3b_v2 {
|
|||||||
|
|
||||||
wiki_test_60="${path_wiki}/wiki.test-60.raw"
|
wiki_test_60="${path_wiki}/wiki.test-60.raw"
|
||||||
|
|
||||||
|
./bin/test-autorelease ${model_f16}
|
||||||
|
|
||||||
./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_0} q4_0
|
./bin/quantize ${model_f16} ${model_q4_0} q4_0
|
||||||
./bin/quantize ${model_f16} ${model_q4_1} q4_1
|
./bin/quantize ${model_f16} ${model_q4_1} q4_1
|
||||||
|
19
ggml-metal.m
19
ggml-metal.m
@ -303,22 +303,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// dictionary of preprocessor macros
|
@autoreleasepool {
|
||||||
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
|
// dictionary of preprocessor macros
|
||||||
|
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
|
||||||
|
|
||||||
#ifdef GGML_QKK_64
|
#ifdef GGML_QKK_64
|
||||||
prep[@"QK_K"] = @(64);
|
prep[@"QK_K"] = @(64);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
MTLCompileOptions* options = [MTLCompileOptions new];
|
MTLCompileOptions* options = [MTLCompileOptions new];
|
||||||
options.preprocessorMacros = prep;
|
options.preprocessorMacros = prep;
|
||||||
|
|
||||||
//[options setFastMathEnabled:false];
|
//[options setFastMathEnabled:false];
|
||||||
|
|
||||||
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
|
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
|
||||||
|
}
|
||||||
[options release];
|
|
||||||
[prep release];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
|
@ -49,6 +49,7 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
|
|||||||
llama_build_and_test_executable(test-grad0.cpp)
|
llama_build_and_test_executable(test-grad0.cpp)
|
||||||
# llama_build_and_test_executable(test-opt.cpp) # SLOW
|
# llama_build_and_test_executable(test-opt.cpp) # SLOW
|
||||||
llama_build_and_test_executable(test-backend-ops.cpp)
|
llama_build_and_test_executable(test-backend-ops.cpp)
|
||||||
|
llama_build_and_test_executable(test-autorelease.cpp)
|
||||||
|
|
||||||
llama_build_and_test_executable(test-rope.cpp)
|
llama_build_and_test_executable(test-rope.cpp)
|
||||||
|
|
||||||
|
28
tests/test-autorelease.cpp
Normal file
28
tests/test-autorelease.cpp
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
// ref: https://github.com/ggerganov/llama.cpp/issues/4952#issuecomment-1892864763
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#include "llama.h"
|
||||||
|
|
||||||
|
// This creates a new context inside a pthread and then tries to exit cleanly.
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
if (argc < 2) {
|
||||||
|
printf("Usage: %s model.gguf\n", argv[0]);
|
||||||
|
return 0; // intentionally return success
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string fname = argv[1];
|
||||||
|
|
||||||
|
std::thread([&fname]() {
|
||||||
|
llama_backend_init(false);
|
||||||
|
auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
|
||||||
|
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
|
||||||
|
llama_free(ctx);
|
||||||
|
llama_free_model(model);
|
||||||
|
llama_backend_free();
|
||||||
|
}).join();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user