llama : remove C++ API + reorganize common source in /common dir

This commit is contained in:
Georgi Gerganov 2023-08-18 16:22:48 +03:00
parent 38016ed9ec
commit 2d6c2c757c
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
18 changed files with 169 additions and 148 deletions

View File

@ -497,9 +497,11 @@ else()
endif() endif()
# #
# Build libraries # libraries
# #
# ggml
add_library(ggml OBJECT add_library(ggml OBJECT
ggml.c ggml.c
ggml.h ggml.h
@ -524,6 +526,8 @@ if (BUILD_SHARED_LIBS)
install(TARGETS ggml_shared LIBRARY) install(TARGETS ggml_shared LIBRARY)
endif() endif()
# llama
add_library(llama add_library(llama
llama.cpp llama.cpp
llama.h llama.h
@ -545,6 +549,10 @@ if (BUILD_SHARED_LIBS)
install(TARGETS llama LIBRARY) install(TARGETS llama LIBRARY)
endif() endif()
#
# install
#
include(GNUInstallDirs) include(GNUInstallDirs)
install( install(
FILES convert.py FILES convert.py
@ -583,6 +591,8 @@ endif()
# programs, examples and tests # programs, examples and tests
# #
add_subdirectory(common)
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
include(CTest) include(CTest)
add_subdirectory(tests) add_subdirectory(tests)

View File

@ -46,7 +46,7 @@ else
OPT = -O3 OPT = -O3
endif endif
CFLAGS = -I. $(OPT) -std=c11 -fPIC CFLAGS = -I. $(OPT) -std=c11 -fPIC
CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC CXXFLAGS = -I. -I./common $(OPT) -std=c++11 -fPIC
LDFLAGS = LDFLAGS =
ifdef LLAMA_DEBUG ifdef LLAMA_DEBUG
@ -332,13 +332,13 @@ OBJS += ggml-alloc.o
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
common.o: examples/common.cpp examples/common.h common.o: common/common.cpp common/common.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
console.o: examples/console.cpp examples/console.h console.o: common/console.cpp common/console.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
libllama.so: llama.o ggml.o $(OBJS) libllama.so: llama.o ggml.o $(OBJS)
@ -388,7 +388,7 @@ embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-te
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS) gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS) train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp build-info.h ggml.o llama.o $(OBJS) convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp build-info.h ggml.o llama.o $(OBJS)
@ -421,7 +421,7 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS) tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
tests/test-grammar-parser: tests/test-grammar-parser.cpp examples/grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS) tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS) tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)

20
common/CMakeLists.txt Normal file
View File

@ -0,0 +1,20 @@
# common
set(TARGET common)
add_library(${TARGET} OBJECT
common.h
common.cpp
console.h
console.cpp
grammar-parser.h
grammar-parser.cpp
)
if (BUILD_SHARED_LIBS)
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
target_include_directories(${TARGET} PUBLIC .)
target_compile_features(${TARGET} PUBLIC cxx_std_11)
target_link_libraries(${TARGET} PRIVATE llama)

View File

@ -636,6 +636,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
return "The"; return "The";
} }
//
// Model utils
//
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) { struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) {
auto lparams = llama_context_default_params(); auto lparams = llama_context_default_params();
@ -689,3 +693,71 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
return std::make_tuple(model, lctx); return std::make_tuple(model, lctx);
} }
//
// Vocab utils
//
std::vector<llama_token> llama_tokenize(
struct llama_context * ctx,
const std::string & text,
bool add_bos) {
// upper limit for the number of tokens
int n_tokens = text.length() + add_bos;
std::vector<llama_token> result(n_tokens);
n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);
}
return result;
}
std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
std::vector<char> result(8, 0);
const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_token_to_str(ctx, token, result.data(), result.size());
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);
}
return std::string(result.data(), result.size());
}
std::vector<llama_token> llama_tokenize_bpe(
struct llama_context * ctx,
const std::string & text,
bool add_bos) {
int n_tokens = text.length() + add_bos;
std::vector<llama_token> result(n_tokens);
n_tokens = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);
}
return result;
}
std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
std::vector<char> result(8, 0);
const int n_tokens = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
if (n_tokens < 0) {
result.resize(-n_tokens);
const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);
}
return std::string(result.data(), result.size());
}

View File

@ -2,7 +2,6 @@
#pragma once #pragma once
#define LLAMA_API_CPP // TODO: eliminate me
#include "llama.h" #include "llama.h"
#include <string> #include <string>
@ -105,3 +104,25 @@ std::string gpt_random_prompt(std::mt19937 & rng);
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params); std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params); struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
//
// Vocab utils
//
std::vector<llama_token> llama_tokenize(
struct llama_context * ctx,
const std::string & text,
bool add_bos);
std::vector<llama_token> llama_tokenize_bpe(
struct llama_context * ctx,
const std::string & text,
bool add_bos);
std::string llama_token_to_str(
const struct llama_context * ctx,
llama_token token);
std::string llama_token_to_str_bpe(
const struct llama_context * ctx,
llama_token token);

View File

@ -6,27 +6,6 @@ find_package(Threads REQUIRED)
# ... # ...
# common
set(TARGET common)
add_library(${TARGET} OBJECT
common.h
common.cpp
console.h
console.cpp
grammar-parser.h
grammar-parser.cpp
)
if (BUILD_SHARED_LIBS)
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
target_include_directories(${TARGET} PUBLIC .)
target_compile_features(${TARGET} PUBLIC cxx_std_11)
target_link_libraries(${TARGET} PRIVATE llama)
# examples # examples
include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -1,7 +1,6 @@
#include "ggml.h" #include "ggml.h"
#include "build-info.h" #include "build-info.h"
#define LLAMA_API_CPP // TODO: eliminate me
#define LLAMA_API_INTERNAL #define LLAMA_API_INTERNAL
#include "llama.h" #include "llama.h"

View File

@ -6,7 +6,6 @@
#include <cstdio> #include <cstdio>
#endif #endif
#define LLAMA_API_CPP // TODO: eliminate me
#include "llama.h" #include "llama.h"
#include "ggml.h" #include "ggml.h"
@ -277,7 +276,7 @@ struct llama_file {
} }
} }
uint32_t read_u32() { uint32_t read_u32() const {
uint32_t ret; uint32_t ret;
read_raw(&ret, sizeof(ret)); read_raw(&ret, sizeof(ret));
return ret; return ret;
@ -559,10 +558,24 @@ struct llama_mlock {
typedef void (*offload_func_t)(struct ggml_tensor * tensor); typedef void (*offload_func_t)(struct ggml_tensor * tensor);
void llama_nop(struct ggml_tensor * tensor) { // don't offload by default static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
(void) tensor; (void) tensor;
} }
static std::string llama_token_to_text(const struct llama_context * ctx, llama_token token) {
std::vector<char> result(8, 0);
const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_token_to_str(ctx, token, result.data(), result.size());
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);
}
return std::string(result.data(), result.size());
}
// //
// globals // globals
// //
@ -3287,15 +3300,15 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
for (size_t i = 0; i < candidates->size; ++i) { for (size_t i = 0; i < candidates->size; ++i) {
const llama_token id = candidates->data[i].id; const llama_token id = candidates->data[i].id;
std::string str = llama_token_to_str(ctx, id); const std::string text = llama_token_to_text(ctx, id);
if (id == eos) { if (id == eos) {
if (!allow_eos) { if (!allow_eos) {
candidates->data[i].logit = -INFINITY; candidates->data[i].logit = -INFINITY;
} }
} else if (str.empty()) { } else if (text.empty()) {
candidates->data[i].logit = -INFINITY; candidates->data[i].logit = -INFINITY;
} else { } else {
candidates_decoded.push_back(decode_utf8(str.c_str(), grammar->partial_utf8)); candidates_decoded.push_back(decode_utf8(text.c_str(), grammar->partial_utf8));
candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second }); candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
} }
} }
@ -3495,10 +3508,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
GGML_ASSERT(false); GGML_ASSERT(false);
} }
const std::string str = llama_token_to_str(ctx, token); const std::string text = llama_token_to_text(ctx, token);
// Note terminating 0 in decoded string // Note terminating 0 in decoded string
const auto decoded = decode_utf8(str.c_str(), grammar->partial_utf8); const auto decoded = decode_utf8(text.c_str(), grammar->partial_utf8);
const auto & code_points = decoded.first; const auto & code_points = decoded.first;
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
@ -5144,73 +5157,6 @@ const char * llama_print_system_info(void) {
return s.c_str(); return s.c_str();
} }
std::vector<llama_token> llama_tokenize(
struct llama_context * ctx,
const std::string & text,
bool add_bos) {
// upper limit for the number of tokens
int n_tokens = text.length() + add_bos;
std::vector<llama_token> result(n_tokens);
n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
assert(check == -n_tokens);
GGML_UNUSED(check);
} else {
result.resize(n_tokens);
}
return result;
}
std::vector<llama_token> llama_tokenize_bpe(
struct llama_context * ctx,
const std::string & text,
bool add_bos) {
int length = text.length() + add_bos;
std::vector<llama_token> result(length);
length = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
if (length < 0) {
result.resize(-length);
int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
assert(check == -length);
GGML_UNUSED(check);
} else {
result.resize(length);
}
return result;
}
std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
std::vector<char> result(8, 0);
const int length = llama_token_to_str(ctx, token, result.data(), result.size());
if (length < 0) {
result.resize(-length);
int check = llama_token_to_str(ctx, token, result.data(), result.size());
GGML_ASSERT(check == -length);
} else {
result.resize(length);
}
return std::string(result.data(), result.size());
}
std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
std::vector<char> result(8, 0);
const int length = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
if (length < 0) {
result.resize(-length);
const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
GGML_ASSERT(check == -length);
} else {
result.resize(length);
}
return std::string(result.data(), result.size());
}
// For internal test use // For internal test use
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) { const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
return ctx->model.tensors_by_name; return ctx->model.tensors_by_name;

31
llama.h
View File

@ -472,43 +472,16 @@ extern "C" {
} }
#endif #endif
// C++ API, will be moving to common.h soon (TM) // Internal API to be implemented by llama.cpp and used by tests/benchmarks only
#ifdef LLAMA_API_CPP #ifdef LLAMA_API_INTERNAL
#include <vector> #include <vector>
#include <string> #include <string>
//
// Vocab utils
//
std::vector<llama_token> llama_tokenize(
struct llama_context * ctx,
const std::string & text,
bool add_bos);
std::vector<llama_token> llama_tokenize_bpe(
struct llama_context * ctx,
const std::string & text,
bool add_bos);
std::string llama_token_to_str(
const struct llama_context * ctx,
llama_token token);
std::string llama_token_to_str_bpe(
const struct llama_context * ctx,
llama_token token);
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
#ifdef LLAMA_API_INTERNAL
struct ggml_tensor; struct ggml_tensor;
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx); const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
#endif // LLAMA_API_CPP
#endif // LLAMA_API_INTERNAL #endif // LLAMA_API_INTERNAL
#endif // LLAMA_H #endif // LLAMA_H

View File

@ -2,7 +2,7 @@ function(llama_build_executable source)
get_filename_component(TEST_TARGET ${source} NAME_WE) get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source}) add_executable(${TEST_TARGET} ${source})
install(TARGETS ${TEST_TARGET} RUNTIME) install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE llama) target_link_libraries(${TEST_TARGET} PRIVATE llama common)
endfunction() endfunction()
function(llama_test_executable name source) function(llama_test_executable name source)
@ -17,7 +17,7 @@ function(llama_build_and_test_executable source)
get_filename_component(TEST_TARGET ${source} NAME_WE) get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source}) add_executable(${TEST_TARGET} ${source})
install(TARGETS ${TEST_TARGET} RUNTIME) install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE llama) target_link_libraries(${TEST_TARGET} PRIVATE llama common)
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
endfunction() endfunction()
@ -26,11 +26,11 @@ llama_build_and_test_executable(test-quantize-fns.cpp)
llama_build_and_test_executable(test-quantize-perf.cpp) llama_build_and_test_executable(test-quantize-perf.cpp)
llama_build_and_test_executable(test-sampling.cpp) llama_build_and_test_executable(test-sampling.cpp)
llama_build_executable(test-tokenizer-0.cpp) llama_build_executable(test-tokenizer-0.cpp)
llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) llama_test_executable (test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
llama_build_executable(test-tokenizer-1.cpp) llama_build_executable(test-tokenizer-1.cpp)
llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) llama_test_executable (test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) #llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp) llama_build_and_test_executable(test-grammar-parser.cpp)
llama_build_and_test_executable(test-llama-grammar.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/common.cpp) llama_build_and_test_executable(test-llama-grammar.cpp)
llama_build_and_test_executable(test-grad0.cpp) # SLOW llama_build_and_test_executable(test-grad0.cpp) # SLOW
# llama_build_and_test_executable(test-opt.cpp) # SLOW # llama_build_and_test_executable(test-opt.cpp) # SLOW

View File

@ -3,7 +3,8 @@
#endif #endif
#include "llama.h" #include "llama.h"
#include "examples/grammar-parser.cpp" #include "grammar-parser.h"
#include <cassert> #include <cassert>
int main() int main()

View File

@ -2,9 +2,9 @@
#undef NDEBUG #undef NDEBUG
#endif #endif
#include "llama.cpp" #include "llama.cpp" // TODO: not great
#include "examples/common.cpp" #include "grammar-parser.h"
#include "examples/grammar-parser.cpp"
#include <cassert> #include <cassert>
int main() int main()

View File

@ -1,5 +1,5 @@
#define LLAMA_API_CPP // TODO: eliminate me
#include "llama.h" #include "llama.h"
#include "common.h"
#include <cstdio> #include <cstdio>
#include <string> #include <string>

View File

@ -1,5 +1,5 @@
#define LLAMA_API_CPP // TODO: eliminate me
#include "llama.h" #include "llama.h"
#include "common.h"
#include <cassert> #include <cassert>
#include <cstdio> #include <cstdio>