mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-28 12:24:35 +00:00
make : add parallel to build + fix static functions in llama.cpp
This commit is contained in:
parent
7e2b9974d1
commit
25bd254089
1
.gitignore
vendored
1
.gitignore
vendored
@ -52,6 +52,7 @@ models-mnt
|
|||||||
/server
|
/server
|
||||||
/simple
|
/simple
|
||||||
/speculative
|
/speculative
|
||||||
|
/parallel
|
||||||
/train-text-from-scratch
|
/train-text-from-scratch
|
||||||
/vdot
|
/vdot
|
||||||
build-info.h
|
build-info.h
|
||||||
|
5
Makefile
5
Makefile
@ -1,5 +1,5 @@
|
|||||||
# Define the default target now so that it is always the first target
|
# Define the default target now so that it is always the first target
|
||||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative tests/test-c.o
|
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative parallel tests/test-c.o
|
||||||
|
|
||||||
# Binaries only useful for tests
|
# Binaries only useful for tests
|
||||||
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
|
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
|
||||||
@ -563,6 +563,9 @@ beam-search: examples/beam-search/beam-search.cpp build-info.h ggml.o llama.o co
|
|||||||
speculative: examples/speculative/speculative.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
|
speculative: examples/speculative/speculative.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
parallel: examples/parallel/parallel.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
ifdef LLAMA_METAL
|
ifdef LLAMA_METAL
|
||||||
metal: examples/metal/metal.cpp ggml.o $(OBJS)
|
metal: examples/metal/metal.cpp ggml.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
|
10
llama.cpp
10
llama.cpp
@ -1318,7 +1318,7 @@ static bool llama_kv_cache_find_slot(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// find how many cells are currently in use
|
// find how many cells are currently in use
|
||||||
int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
|
static int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
|
||||||
for (uint32_t i = cache.size - 2; i > 0; --i) {
|
for (uint32_t i = cache.size - 2; i > 0; --i) {
|
||||||
if (cache.cells[i].pos >= 0 && !cache.cells[i].seq_id.empty()) {
|
if (cache.cells[i].pos >= 0 && !cache.cells[i].seq_id.empty()) {
|
||||||
return i + 1;
|
return i + 1;
|
||||||
@ -1328,7 +1328,7 @@ int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_kv_cache_rm_tokens(struct llama_kv_cache & cache, int32_t c0, int32_t c1) {
|
static void llama_kv_cache_rm_tokens(struct llama_kv_cache & cache, int32_t c0, int32_t c1) {
|
||||||
if (c0 < 0) c0 = 0;
|
if (c0 < 0) c0 = 0;
|
||||||
if (c1 < 0) c1 = cache.size;
|
if (c1 < 0) c1 = cache.size;
|
||||||
|
|
||||||
@ -1338,7 +1338,7 @@ void llama_kv_cache_rm_tokens(struct llama_kv_cache & cache, int32_t c0, int32_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_kv_cache_rm_seq(
|
static void llama_kv_cache_rm_seq(
|
||||||
struct llama_kv_cache & cache,
|
struct llama_kv_cache & cache,
|
||||||
llama_seq_id seq_id,
|
llama_seq_id seq_id,
|
||||||
llama_pos p0,
|
llama_pos p0,
|
||||||
@ -1353,7 +1353,7 @@ void llama_kv_cache_rm_seq(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_kv_cache_keep_seq(struct llama_kv_cache & cache, llama_seq_id seq_id) {
|
static void llama_kv_cache_keep_seq(struct llama_kv_cache & cache, llama_seq_id seq_id) {
|
||||||
for (uint32_t i = 0; i < cache.size; ++i) {
|
for (uint32_t i = 0; i < cache.size; ++i) {
|
||||||
if (!cache.cells[i].has_seq_id(seq_id)) {
|
if (!cache.cells[i].has_seq_id(seq_id)) {
|
||||||
cache.cells[i].pos = -1;
|
cache.cells[i].pos = -1;
|
||||||
@ -1362,7 +1362,7 @@ void llama_kv_cache_keep_seq(struct llama_kv_cache & cache, llama_seq_id seq_id)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_kv_cache_shift_seq(
|
static void llama_kv_cache_shift_seq(
|
||||||
struct llama_kv_cache & cache,
|
struct llama_kv_cache & cache,
|
||||||
llama_seq_id seq_id,
|
llama_seq_id seq_id,
|
||||||
llama_pos p0,
|
llama_pos p0,
|
||||||
|
Loading…
Reference in New Issue
Block a user