diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c5a3e09e..bb63ef98e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -497,9 +497,11 @@ else() endif() # -# Build libraries +# libraries # +# ggml + add_library(ggml OBJECT ggml.c ggml.h @@ -524,6 +526,8 @@ if (BUILD_SHARED_LIBS) install(TARGETS ggml_shared LIBRARY) endif() +# llama + add_library(llama llama.cpp llama.h @@ -545,6 +549,10 @@ if (BUILD_SHARED_LIBS) install(TARGETS llama LIBRARY) endif() +# +# install +# + include(GNUInstallDirs) install( FILES convert.py @@ -583,6 +591,8 @@ endif() # programs, examples and tests # +add_subdirectory(common) + if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) include(CTest) add_subdirectory(tests) diff --git a/Makefile b/Makefile index e212c63ca..d31acc450 100644 --- a/Makefile +++ b/Makefile @@ -45,8 +45,8 @@ OPT = -Ofast else OPT = -O3 endif -CFLAGS = -I. $(OPT) -std=c11 -fPIC -CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC +CFLAGS = -I. $(OPT) -std=c11 -fPIC +CXXFLAGS = -I. -I./common $(OPT) -std=c++11 -fPIC LDFLAGS = ifdef LLAMA_DEBUG @@ -332,13 +332,13 @@ OBJS += ggml-alloc.o llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h $(CXX) $(CXXFLAGS) -c $< -o $@ -common.o: examples/common.cpp examples/common.h +common.o: common/common.cpp common/common.h $(CXX) $(CXXFLAGS) -c $< -o $@ -console.o: examples/console.cpp examples/console.h +console.o: common/console.cpp common/console.h $(CXX) $(CXXFLAGS) -c $< -o $@ -grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h +grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h $(CXX) $(CXXFLAGS) -c $< -o $@ libllama.so: llama.o ggml.o $(OBJS) @@ -388,7 +388,7 @@ embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-te gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS) +train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp build-info.h ggml.o llama.o $(OBJS) @@ -421,7 +421,7 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) -tests/test-grammar-parser: tests/test-grammar-parser.cpp examples/grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS) +tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt new file mode 100644 index 000000000..dead56118 --- /dev/null +++ b/common/CMakeLists.txt @@ -0,0 +1,20 @@ +# common + +set(TARGET common) + +add_library(${TARGET} OBJECT + common.h + common.cpp + console.h + console.cpp + grammar-parser.h + grammar-parser.cpp + ) + +if (BUILD_SHARED_LIBS) + set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +target_include_directories(${TARGET} PUBLIC .) +target_compile_features(${TARGET} PUBLIC cxx_std_11) +target_link_libraries(${TARGET} PRIVATE llama) diff --git a/examples/common.cpp b/common/common.cpp similarity index 93% rename from examples/common.cpp rename to common/common.cpp index ea6c9d499..8ea7bdda0 100644 --- a/examples/common.cpp +++ b/common/common.cpp @@ -636,6 +636,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) { return "The"; } +// +// Model utils +// + struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) { auto lparams = llama_context_default_params(); @@ -689,3 +693,71 @@ std::tuple llama_init_from_gpt_par return std::make_tuple(model, lctx); } + +// +// Vocab utils +// + +std::vector llama_tokenize( + struct llama_context * ctx, + const std::string & text, + bool add_bos) { + // upper limit for the number of tokens + int n_tokens = text.length() + add_bos; + std::vector result(n_tokens); + n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + return result; +} + +std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) { + std::vector result(8, 0); + const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size()); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_token_to_str(ctx, token, result.data(), result.size()); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + + return std::string(result.data(), result.size()); +} + +std::vector llama_tokenize_bpe( + struct llama_context * ctx, + const std::string & text, + bool add_bos) { + int n_tokens = text.length() + add_bos; + std::vector result(n_tokens); + n_tokens = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + return result; +} + +std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) { + std::vector result(8, 0); + const int n_tokens = llama_token_to_str_bpe(ctx, token, result.data(), result.size()); + if (n_tokens < 0) { + result.resize(-n_tokens); + const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size()); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + + return std::string(result.data(), result.size()); +} + diff --git a/examples/common.h b/common/common.h similarity index 90% rename from examples/common.h rename to common/common.h index de04b4c5b..50145c932 100644 --- a/examples/common.h +++ b/common/common.h @@ -2,7 +2,6 @@ #pragma once -#define LLAMA_API_CPP // TODO: eliminate me #include "llama.h" #include @@ -105,3 +104,25 @@ std::string gpt_random_prompt(std::mt19937 & rng); std::tuple llama_init_from_gpt_params(const gpt_params & params); struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params); + +// +// Vocab utils +// + +std::vector llama_tokenize( + struct llama_context * ctx, + const std::string & text, + bool add_bos); + +std::vector llama_tokenize_bpe( + struct llama_context * ctx, + const std::string & text, + bool add_bos); + +std::string llama_token_to_str( + const struct llama_context * ctx, + llama_token token); + +std::string llama_token_to_str_bpe( + const struct llama_context * ctx, + llama_token token); diff --git a/examples/console.cpp b/common/console.cpp similarity index 100% rename from examples/console.cpp rename to common/console.cpp diff --git a/examples/console.h b/common/console.h similarity index 100% rename from examples/console.h rename to common/console.h diff --git a/examples/grammar-parser.cpp b/common/grammar-parser.cpp similarity index 100% rename from examples/grammar-parser.cpp rename to common/grammar-parser.cpp diff --git a/examples/grammar-parser.h b/common/grammar-parser.h similarity index 100% rename from examples/grammar-parser.h rename to common/grammar-parser.h diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index d53652815..d2176c910 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -6,27 +6,6 @@ find_package(Threads REQUIRED) # ... -# common - -set(TARGET common) - -add_library(${TARGET} OBJECT - common.h - common.cpp - console.h - console.cpp - grammar-parser.h - grammar-parser.cpp - ) - -if (BUILD_SHARED_LIBS) - set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) -endif() - -target_include_directories(${TARGET} PUBLIC .) -target_compile_features(${TARGET} PUBLIC cxx_std_11) -target_link_libraries(${TARGET} PRIVATE llama) - # examples include_directories(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 60ace8765..06ce18f09 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -1,7 +1,6 @@ #include "ggml.h" #include "build-info.h" -#define LLAMA_API_CPP // TODO: eliminate me #define LLAMA_API_INTERNAL #include "llama.h" diff --git a/llama.cpp b/llama.cpp index 2509c9dcf..87e2d5b11 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6,7 +6,6 @@ #include #endif -#define LLAMA_API_CPP // TODO: eliminate me #include "llama.h" #include "ggml.h" @@ -277,7 +276,7 @@ struct llama_file { } } - uint32_t read_u32() { + uint32_t read_u32() const { uint32_t ret; read_raw(&ret, sizeof(ret)); return ret; @@ -559,10 +558,24 @@ struct llama_mlock { typedef void (*offload_func_t)(struct ggml_tensor * tensor); -void llama_nop(struct ggml_tensor * tensor) { // don't offload by default +static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default (void) tensor; } +static std::string llama_token_to_text(const struct llama_context * ctx, llama_token token) { + std::vector result(8, 0); + const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size()); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_token_to_str(ctx, token, result.data(), result.size()); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + + return std::string(result.data(), result.size()); +} + // // globals // @@ -3286,16 +3299,16 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c std::vector candidates_grammar; for (size_t i = 0; i < candidates->size; ++i) { - const llama_token id = candidates->data[i].id; - std::string str = llama_token_to_str(ctx, id); + const llama_token id = candidates->data[i].id; + const std::string text = llama_token_to_text(ctx, id); if (id == eos) { if (!allow_eos) { candidates->data[i].logit = -INFINITY; } - } else if (str.empty()) { + } else if (text.empty()) { candidates->data[i].logit = -INFINITY; } else { - candidates_decoded.push_back(decode_utf8(str.c_str(), grammar->partial_utf8)); + candidates_decoded.push_back(decode_utf8(text.c_str(), grammar->partial_utf8)); candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second }); } } @@ -3495,10 +3508,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar GGML_ASSERT(false); } - const std::string str = llama_token_to_str(ctx, token); + const std::string text = llama_token_to_text(ctx, token); // Note terminating 0 in decoded string - const auto decoded = decode_utf8(str.c_str(), grammar->partial_utf8); + const auto decoded = decode_utf8(text.c_str(), grammar->partial_utf8); const auto & code_points = decoded.first; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); @@ -5144,73 +5157,6 @@ const char * llama_print_system_info(void) { return s.c_str(); } - -std::vector llama_tokenize( - struct llama_context * ctx, - const std::string & text, - bool add_bos) { - // upper limit for the number of tokens - int n_tokens = text.length() + add_bos; - std::vector result(n_tokens); - n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos); - if (n_tokens < 0) { - result.resize(-n_tokens); - int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos); - assert(check == -n_tokens); - GGML_UNUSED(check); - } else { - result.resize(n_tokens); - } - return result; -} - -std::vector llama_tokenize_bpe( - struct llama_context * ctx, - const std::string & text, - bool add_bos) { - int length = text.length() + add_bos; - std::vector result(length); - length = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos); - if (length < 0) { - result.resize(-length); - int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos); - assert(check == -length); - GGML_UNUSED(check); - } else { - result.resize(length); - } - return result; -} - -std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) { - std::vector result(8, 0); - const int length = llama_token_to_str(ctx, token, result.data(), result.size()); - if (length < 0) { - result.resize(-length); - int check = llama_token_to_str(ctx, token, result.data(), result.size()); - GGML_ASSERT(check == -length); - } else { - result.resize(length); - } - - return std::string(result.data(), result.size()); -} - -std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) { - std::vector result(8, 0); - const int length = llama_token_to_str_bpe(ctx, token, result.data(), result.size()); - if (length < 0) { - result.resize(-length); - const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size()); - GGML_ASSERT(check == -length); - } else { - result.resize(length); - } - - return std::string(result.data(), result.size()); -} - - // For internal test use const std::vector>& llama_internal_get_tensor_map(struct llama_context * ctx) { return ctx->model.tensors_by_name; diff --git a/llama.h b/llama.h index 8e5c339d3..5126b8193 100644 --- a/llama.h +++ b/llama.h @@ -472,43 +472,16 @@ extern "C" { } #endif -// C++ API, will be moving to common.h soon (TM) -#ifdef LLAMA_API_CPP +// Internal API to be implemented by llama.cpp and used by tests/benchmarks only +#ifdef LLAMA_API_INTERNAL #include #include -// -// Vocab utils -// - -std::vector llama_tokenize( - struct llama_context * ctx, - const std::string & text, - bool add_bos); - -std::vector llama_tokenize_bpe( - struct llama_context * ctx, - const std::string & text, - bool add_bos); - -std::string llama_token_to_str( - const struct llama_context * ctx, - llama_token token); - -std::string llama_token_to_str_bpe( - const struct llama_context * ctx, - llama_token token); - -// Internal API to be implemented by llama.cpp and used by tests/benchmarks only -#ifdef LLAMA_API_INTERNAL - struct ggml_tensor; const std::vector>& llama_internal_get_tensor_map(struct llama_context * ctx); -#endif // LLAMA_API_CPP - #endif // LLAMA_API_INTERNAL #endif // LLAMA_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d19ec5af0..4ccefe932 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,7 +2,7 @@ function(llama_build_executable source) get_filename_component(TEST_TARGET ${source} NAME_WE) add_executable(${TEST_TARGET} ${source}) install(TARGETS ${TEST_TARGET} RUNTIME) - target_link_libraries(${TEST_TARGET} PRIVATE llama) + target_link_libraries(${TEST_TARGET} PRIVATE llama common) endfunction() function(llama_test_executable name source) @@ -17,7 +17,7 @@ function(llama_build_and_test_executable source) get_filename_component(TEST_TARGET ${source} NAME_WE) add_executable(${TEST_TARGET} ${source}) install(TARGETS ${TEST_TARGET} RUNTIME) - target_link_libraries(${TEST_TARGET} PRIVATE llama) + target_link_libraries(${TEST_TARGET} PRIVATE llama common) add_test(NAME ${TEST_TARGET} COMMAND $ ${ARGN}) endfunction() @@ -26,11 +26,11 @@ llama_build_and_test_executable(test-quantize-fns.cpp) llama_build_and_test_executable(test-quantize-perf.cpp) llama_build_and_test_executable(test-sampling.cpp) llama_build_executable(test-tokenizer-0.cpp) -llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +llama_test_executable (test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) llama_build_executable(test-tokenizer-1.cpp) -llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +llama_test_executable (test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) #llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) -llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp) -llama_build_and_test_executable(test-llama-grammar.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/common.cpp) +llama_build_and_test_executable(test-grammar-parser.cpp) +llama_build_and_test_executable(test-llama-grammar.cpp) llama_build_and_test_executable(test-grad0.cpp) # SLOW # llama_build_and_test_executable(test-opt.cpp) # SLOW diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp index 7022988b4..a0b5b043d 100644 --- a/tests/test-grammar-parser.cpp +++ b/tests/test-grammar-parser.cpp @@ -3,7 +3,8 @@ #endif #include "llama.h" -#include "examples/grammar-parser.cpp" +#include "grammar-parser.h" + #include int main() diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp index 81c31e9e2..73dd33dd2 100644 --- a/tests/test-llama-grammar.cpp +++ b/tests/test-llama-grammar.cpp @@ -2,9 +2,9 @@ #undef NDEBUG #endif -#include "llama.cpp" -#include "examples/common.cpp" -#include "examples/grammar-parser.cpp" +#include "llama.cpp" // TODO: not great +#include "grammar-parser.h" + #include int main() diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp index e368ec3a6..81764565b 100644 --- a/tests/test-tokenizer-0.cpp +++ b/tests/test-tokenizer-0.cpp @@ -1,5 +1,5 @@ -#define LLAMA_API_CPP // TODO: eliminate me #include "llama.h" +#include "common.h" #include #include diff --git a/tests/test-tokenizer-1.cpp b/tests/test-tokenizer-1.cpp index 620e3f275..5841f7339 100644 --- a/tests/test-tokenizer-1.cpp +++ b/tests/test-tokenizer-1.cpp @@ -1,5 +1,5 @@ -#define LLAMA_API_CPP // TODO: eliminate me #include "llama.h" +#include "common.h" #include #include