mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
llama : remove C++ API + reorganize common source in /common dir
This commit is contained in:
parent
38016ed9ec
commit
2d6c2c757c
@ -497,9 +497,11 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
#
|
#
|
||||||
# Build libraries
|
# libraries
|
||||||
#
|
#
|
||||||
|
|
||||||
|
# ggml
|
||||||
|
|
||||||
add_library(ggml OBJECT
|
add_library(ggml OBJECT
|
||||||
ggml.c
|
ggml.c
|
||||||
ggml.h
|
ggml.h
|
||||||
@ -524,6 +526,8 @@ if (BUILD_SHARED_LIBS)
|
|||||||
install(TARGETS ggml_shared LIBRARY)
|
install(TARGETS ggml_shared LIBRARY)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# llama
|
||||||
|
|
||||||
add_library(llama
|
add_library(llama
|
||||||
llama.cpp
|
llama.cpp
|
||||||
llama.h
|
llama.h
|
||||||
@ -545,6 +549,10 @@ if (BUILD_SHARED_LIBS)
|
|||||||
install(TARGETS llama LIBRARY)
|
install(TARGETS llama LIBRARY)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#
|
||||||
|
# install
|
||||||
|
#
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
install(
|
install(
|
||||||
FILES convert.py
|
FILES convert.py
|
||||||
@ -583,6 +591,8 @@ endif()
|
|||||||
# programs, examples and tests
|
# programs, examples and tests
|
||||||
#
|
#
|
||||||
|
|
||||||
|
add_subdirectory(common)
|
||||||
|
|
||||||
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||||
include(CTest)
|
include(CTest)
|
||||||
add_subdirectory(tests)
|
add_subdirectory(tests)
|
||||||
|
12
Makefile
12
Makefile
@ -46,7 +46,7 @@ else
|
|||||||
OPT = -O3
|
OPT = -O3
|
||||||
endif
|
endif
|
||||||
CFLAGS = -I. $(OPT) -std=c11 -fPIC
|
CFLAGS = -I. $(OPT) -std=c11 -fPIC
|
||||||
CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
|
CXXFLAGS = -I. -I./common $(OPT) -std=c++11 -fPIC
|
||||||
LDFLAGS =
|
LDFLAGS =
|
||||||
|
|
||||||
ifdef LLAMA_DEBUG
|
ifdef LLAMA_DEBUG
|
||||||
@ -332,13 +332,13 @@ OBJS += ggml-alloc.o
|
|||||||
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
|
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
common.o: examples/common.cpp examples/common.h
|
common.o: common/common.cpp common/common.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
console.o: examples/console.cpp examples/console.h
|
console.o: common/console.cpp common/console.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
|
grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
libllama.so: llama.o ggml.o $(OBJS)
|
libllama.so: llama.o ggml.o $(OBJS)
|
||||||
@ -388,7 +388,7 @@ embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-te
|
|||||||
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS)
|
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp build-info.h ggml.o llama.o $(OBJS)
|
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||||
@ -421,7 +421,7 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
|||||||
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
tests/test-grammar-parser: tests/test-grammar-parser.cpp examples/grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
|
20
common/CMakeLists.txt
Normal file
20
common/CMakeLists.txt
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# common
|
||||||
|
|
||||||
|
set(TARGET common)
|
||||||
|
|
||||||
|
add_library(${TARGET} OBJECT
|
||||||
|
common.h
|
||||||
|
common.cpp
|
||||||
|
console.h
|
||||||
|
console.cpp
|
||||||
|
grammar-parser.h
|
||||||
|
grammar-parser.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
if (BUILD_SHARED_LIBS)
|
||||||
|
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_include_directories(${TARGET} PUBLIC .)
|
||||||
|
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE llama)
|
@ -636,6 +636,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
|
|||||||
return "The";
|
return "The";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Model utils
|
||||||
|
//
|
||||||
|
|
||||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) {
|
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) {
|
||||||
auto lparams = llama_context_default_params();
|
auto lparams = llama_context_default_params();
|
||||||
|
|
||||||
@ -689,3 +693,71 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||||||
|
|
||||||
return std::make_tuple(model, lctx);
|
return std::make_tuple(model, lctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Vocab utils
|
||||||
|
//
|
||||||
|
|
||||||
|
std::vector<llama_token> llama_tokenize(
|
||||||
|
struct llama_context * ctx,
|
||||||
|
const std::string & text,
|
||||||
|
bool add_bos) {
|
||||||
|
// upper limit for the number of tokens
|
||||||
|
int n_tokens = text.length() + add_bos;
|
||||||
|
std::vector<llama_token> result(n_tokens);
|
||||||
|
n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
||||||
|
if (n_tokens < 0) {
|
||||||
|
result.resize(-n_tokens);
|
||||||
|
int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
||||||
|
GGML_ASSERT(check == -n_tokens);
|
||||||
|
} else {
|
||||||
|
result.resize(n_tokens);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
|
||||||
|
std::vector<char> result(8, 0);
|
||||||
|
const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
|
||||||
|
if (n_tokens < 0) {
|
||||||
|
result.resize(-n_tokens);
|
||||||
|
int check = llama_token_to_str(ctx, token, result.data(), result.size());
|
||||||
|
GGML_ASSERT(check == -n_tokens);
|
||||||
|
} else {
|
||||||
|
result.resize(n_tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::string(result.data(), result.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<llama_token> llama_tokenize_bpe(
|
||||||
|
struct llama_context * ctx,
|
||||||
|
const std::string & text,
|
||||||
|
bool add_bos) {
|
||||||
|
int n_tokens = text.length() + add_bos;
|
||||||
|
std::vector<llama_token> result(n_tokens);
|
||||||
|
n_tokens = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
||||||
|
if (n_tokens < 0) {
|
||||||
|
result.resize(-n_tokens);
|
||||||
|
int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
||||||
|
GGML_ASSERT(check == -n_tokens);
|
||||||
|
} else {
|
||||||
|
result.resize(n_tokens);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
|
||||||
|
std::vector<char> result(8, 0);
|
||||||
|
const int n_tokens = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
|
||||||
|
if (n_tokens < 0) {
|
||||||
|
result.resize(-n_tokens);
|
||||||
|
const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
|
||||||
|
GGML_ASSERT(check == -n_tokens);
|
||||||
|
} else {
|
||||||
|
result.resize(n_tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::string(result.data(), result.size());
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#define LLAMA_API_CPP // TODO: eliminate me
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -105,3 +104,25 @@ std::string gpt_random_prompt(std::mt19937 & rng);
|
|||||||
|
|
||||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
|
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
|
||||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Vocab utils
|
||||||
|
//
|
||||||
|
|
||||||
|
std::vector<llama_token> llama_tokenize(
|
||||||
|
struct llama_context * ctx,
|
||||||
|
const std::string & text,
|
||||||
|
bool add_bos);
|
||||||
|
|
||||||
|
std::vector<llama_token> llama_tokenize_bpe(
|
||||||
|
struct llama_context * ctx,
|
||||||
|
const std::string & text,
|
||||||
|
bool add_bos);
|
||||||
|
|
||||||
|
std::string llama_token_to_str(
|
||||||
|
const struct llama_context * ctx,
|
||||||
|
llama_token token);
|
||||||
|
|
||||||
|
std::string llama_token_to_str_bpe(
|
||||||
|
const struct llama_context * ctx,
|
||||||
|
llama_token token);
|
@ -6,27 +6,6 @@ find_package(Threads REQUIRED)
|
|||||||
|
|
||||||
# ...
|
# ...
|
||||||
|
|
||||||
# common
|
|
||||||
|
|
||||||
set(TARGET common)
|
|
||||||
|
|
||||||
add_library(${TARGET} OBJECT
|
|
||||||
common.h
|
|
||||||
common.cpp
|
|
||||||
console.h
|
|
||||||
console.cpp
|
|
||||||
grammar-parser.h
|
|
||||||
grammar-parser.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
if (BUILD_SHARED_LIBS)
|
|
||||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
target_include_directories(${TARGET} PUBLIC .)
|
|
||||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama)
|
|
||||||
|
|
||||||
# examples
|
# examples
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "build-info.h"
|
#include "build-info.h"
|
||||||
|
|
||||||
#define LLAMA_API_CPP // TODO: eliminate me
|
|
||||||
#define LLAMA_API_INTERNAL
|
#define LLAMA_API_INTERNAL
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
96
llama.cpp
96
llama.cpp
@ -6,7 +6,6 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LLAMA_API_CPP // TODO: eliminate me
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
@ -277,7 +276,7 @@ struct llama_file {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t read_u32() {
|
uint32_t read_u32() const {
|
||||||
uint32_t ret;
|
uint32_t ret;
|
||||||
read_raw(&ret, sizeof(ret));
|
read_raw(&ret, sizeof(ret));
|
||||||
return ret;
|
return ret;
|
||||||
@ -559,10 +558,24 @@ struct llama_mlock {
|
|||||||
|
|
||||||
typedef void (*offload_func_t)(struct ggml_tensor * tensor);
|
typedef void (*offload_func_t)(struct ggml_tensor * tensor);
|
||||||
|
|
||||||
void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
|
static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
|
||||||
(void) tensor;
|
(void) tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string llama_token_to_text(const struct llama_context * ctx, llama_token token) {
|
||||||
|
std::vector<char> result(8, 0);
|
||||||
|
const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
|
||||||
|
if (n_tokens < 0) {
|
||||||
|
result.resize(-n_tokens);
|
||||||
|
int check = llama_token_to_str(ctx, token, result.data(), result.size());
|
||||||
|
GGML_ASSERT(check == -n_tokens);
|
||||||
|
} else {
|
||||||
|
result.resize(n_tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::string(result.data(), result.size());
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// globals
|
// globals
|
||||||
//
|
//
|
||||||
@ -3287,15 +3300,15 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
|
|||||||
|
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
const llama_token id = candidates->data[i].id;
|
const llama_token id = candidates->data[i].id;
|
||||||
std::string str = llama_token_to_str(ctx, id);
|
const std::string text = llama_token_to_text(ctx, id);
|
||||||
if (id == eos) {
|
if (id == eos) {
|
||||||
if (!allow_eos) {
|
if (!allow_eos) {
|
||||||
candidates->data[i].logit = -INFINITY;
|
candidates->data[i].logit = -INFINITY;
|
||||||
}
|
}
|
||||||
} else if (str.empty()) {
|
} else if (text.empty()) {
|
||||||
candidates->data[i].logit = -INFINITY;
|
candidates->data[i].logit = -INFINITY;
|
||||||
} else {
|
} else {
|
||||||
candidates_decoded.push_back(decode_utf8(str.c_str(), grammar->partial_utf8));
|
candidates_decoded.push_back(decode_utf8(text.c_str(), grammar->partial_utf8));
|
||||||
candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
|
candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3495,10 +3508,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
|
|||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string str = llama_token_to_str(ctx, token);
|
const std::string text = llama_token_to_text(ctx, token);
|
||||||
|
|
||||||
// Note terminating 0 in decoded string
|
// Note terminating 0 in decoded string
|
||||||
const auto decoded = decode_utf8(str.c_str(), grammar->partial_utf8);
|
const auto decoded = decode_utf8(text.c_str(), grammar->partial_utf8);
|
||||||
const auto & code_points = decoded.first;
|
const auto & code_points = decoded.first;
|
||||||
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
||||||
grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
|
grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
|
||||||
@ -5144,73 +5157,6 @@ const char * llama_print_system_info(void) {
|
|||||||
return s.c_str();
|
return s.c_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<llama_token> llama_tokenize(
|
|
||||||
struct llama_context * ctx,
|
|
||||||
const std::string & text,
|
|
||||||
bool add_bos) {
|
|
||||||
// upper limit for the number of tokens
|
|
||||||
int n_tokens = text.length() + add_bos;
|
|
||||||
std::vector<llama_token> result(n_tokens);
|
|
||||||
n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
|
||||||
if (n_tokens < 0) {
|
|
||||||
result.resize(-n_tokens);
|
|
||||||
int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
|
||||||
assert(check == -n_tokens);
|
|
||||||
GGML_UNUSED(check);
|
|
||||||
} else {
|
|
||||||
result.resize(n_tokens);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<llama_token> llama_tokenize_bpe(
|
|
||||||
struct llama_context * ctx,
|
|
||||||
const std::string & text,
|
|
||||||
bool add_bos) {
|
|
||||||
int length = text.length() + add_bos;
|
|
||||||
std::vector<llama_token> result(length);
|
|
||||||
length = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
|
||||||
if (length < 0) {
|
|
||||||
result.resize(-length);
|
|
||||||
int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
|
||||||
assert(check == -length);
|
|
||||||
GGML_UNUSED(check);
|
|
||||||
} else {
|
|
||||||
result.resize(length);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
|
|
||||||
std::vector<char> result(8, 0);
|
|
||||||
const int length = llama_token_to_str(ctx, token, result.data(), result.size());
|
|
||||||
if (length < 0) {
|
|
||||||
result.resize(-length);
|
|
||||||
int check = llama_token_to_str(ctx, token, result.data(), result.size());
|
|
||||||
GGML_ASSERT(check == -length);
|
|
||||||
} else {
|
|
||||||
result.resize(length);
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::string(result.data(), result.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
|
|
||||||
std::vector<char> result(8, 0);
|
|
||||||
const int length = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
|
|
||||||
if (length < 0) {
|
|
||||||
result.resize(-length);
|
|
||||||
const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
|
|
||||||
GGML_ASSERT(check == -length);
|
|
||||||
} else {
|
|
||||||
result.resize(length);
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::string(result.data(), result.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// For internal test use
|
// For internal test use
|
||||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
|
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
|
||||||
return ctx->model.tensors_by_name;
|
return ctx->model.tensors_by_name;
|
||||||
|
31
llama.h
31
llama.h
@ -472,43 +472,16 @@ extern "C" {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// C++ API, will be moving to common.h soon (TM)
|
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
|
||||||
#ifdef LLAMA_API_CPP
|
#ifdef LLAMA_API_INTERNAL
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
//
|
|
||||||
// Vocab utils
|
|
||||||
//
|
|
||||||
|
|
||||||
std::vector<llama_token> llama_tokenize(
|
|
||||||
struct llama_context * ctx,
|
|
||||||
const std::string & text,
|
|
||||||
bool add_bos);
|
|
||||||
|
|
||||||
std::vector<llama_token> llama_tokenize_bpe(
|
|
||||||
struct llama_context * ctx,
|
|
||||||
const std::string & text,
|
|
||||||
bool add_bos);
|
|
||||||
|
|
||||||
std::string llama_token_to_str(
|
|
||||||
const struct llama_context * ctx,
|
|
||||||
llama_token token);
|
|
||||||
|
|
||||||
std::string llama_token_to_str_bpe(
|
|
||||||
const struct llama_context * ctx,
|
|
||||||
llama_token token);
|
|
||||||
|
|
||||||
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
|
|
||||||
#ifdef LLAMA_API_INTERNAL
|
|
||||||
|
|
||||||
struct ggml_tensor;
|
struct ggml_tensor;
|
||||||
|
|
||||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
|
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
|
||||||
|
|
||||||
#endif // LLAMA_API_CPP
|
|
||||||
|
|
||||||
#endif // LLAMA_API_INTERNAL
|
#endif // LLAMA_API_INTERNAL
|
||||||
|
|
||||||
#endif // LLAMA_H
|
#endif // LLAMA_H
|
||||||
|
@ -2,7 +2,7 @@ function(llama_build_executable source)
|
|||||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||||
add_executable(${TEST_TARGET} ${source})
|
add_executable(${TEST_TARGET} ${source})
|
||||||
install(TARGETS ${TEST_TARGET} RUNTIME)
|
install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||||
target_link_libraries(${TEST_TARGET} PRIVATE llama)
|
target_link_libraries(${TEST_TARGET} PRIVATE llama common)
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
function(llama_test_executable name source)
|
function(llama_test_executable name source)
|
||||||
@ -17,7 +17,7 @@ function(llama_build_and_test_executable source)
|
|||||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||||
add_executable(${TEST_TARGET} ${source})
|
add_executable(${TEST_TARGET} ${source})
|
||||||
install(TARGETS ${TEST_TARGET} RUNTIME)
|
install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||||
target_link_libraries(${TEST_TARGET} PRIVATE llama)
|
target_link_libraries(${TEST_TARGET} PRIVATE llama common)
|
||||||
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
@ -26,11 +26,11 @@ llama_build_and_test_executable(test-quantize-fns.cpp)
|
|||||||
llama_build_and_test_executable(test-quantize-perf.cpp)
|
llama_build_and_test_executable(test-quantize-perf.cpp)
|
||||||
llama_build_and_test_executable(test-sampling.cpp)
|
llama_build_and_test_executable(test-sampling.cpp)
|
||||||
llama_build_executable(test-tokenizer-0.cpp)
|
llama_build_executable(test-tokenizer-0.cpp)
|
||||||
llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
llama_test_executable (test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||||
llama_build_executable(test-tokenizer-1.cpp)
|
llama_build_executable(test-tokenizer-1.cpp)
|
||||||
llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
llama_test_executable (test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||||
#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
||||||
llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp)
|
llama_build_and_test_executable(test-grammar-parser.cpp)
|
||||||
llama_build_and_test_executable(test-llama-grammar.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/common.cpp)
|
llama_build_and_test_executable(test-llama-grammar.cpp)
|
||||||
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
||||||
# llama_build_and_test_executable(test-opt.cpp) # SLOW
|
# llama_build_and_test_executable(test-opt.cpp) # SLOW
|
||||||
|
@ -3,7 +3,8 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "examples/grammar-parser.cpp"
|
#include "grammar-parser.h"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
@ -2,9 +2,9 @@
|
|||||||
#undef NDEBUG
|
#undef NDEBUG
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "llama.cpp"
|
#include "llama.cpp" // TODO: not great
|
||||||
#include "examples/common.cpp"
|
#include "grammar-parser.h"
|
||||||
#include "examples/grammar-parser.cpp"
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#define LLAMA_API_CPP // TODO: eliminate me
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#define LLAMA_API_CPP // TODO: eliminate me
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
Loading…
Reference in New Issue
Block a user