fix some warnings from gcc and clang-tidy (#3038)

Co-authored-by: xaedes <xaedes@gmail.com>
This commit is contained in:
Cebtenzzre 2023-09-07 13:22:29 -04:00 committed by GitHub
parent 4fa2cc1750
commit 00d62adb79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 63 additions and 101 deletions

View File

@ -3,6 +3,7 @@ Checks: >
bugprone-*, bugprone-*,
-bugprone-easily-swappable-parameters, -bugprone-easily-swappable-parameters,
-bugprone-implicit-widening-of-multiplication-result, -bugprone-implicit-widening-of-multiplication-result,
-bugprone-misplaced-widening-cast,
-bugprone-narrowing-conversions, -bugprone-narrowing-conversions,
readability-*, readability-*,
-readability-avoid-unconditional-preprocessor-if, -readability-avoid-unconditional-preprocessor-if,
@ -15,4 +16,8 @@ Checks: >
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
performance-*, performance-*,
portability-*, portability-*,
misc-*,
-misc-const-correctness,
-misc-non-private-member-variables-in-classes,
-misc-no-recursion,
FormatStyle: none FormatStyle: none

View File

@ -426,7 +426,7 @@ if (LLAMA_ALL_WARNINGS)
) )
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# g++ only # g++ only
set(cxx_flags ${cxx_flags} -Wno-format-truncation) set(cxx_flags ${cxx_flags} -Wno-format-truncation -Wno-array-bounds)
endif() endif()
else() else()
# todo : msvc # todo : msvc

View File

@ -134,7 +134,7 @@ MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-m
ifeq '' '$(findstring clang++,$(CXX))' ifeq '' '$(findstring clang++,$(CXX))'
# g++ only # g++ only
MK_CXXFLAGS += -Wno-format-truncation MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
endif endif
# OS specific # OS specific

View File

@ -57,7 +57,7 @@ int32_t get_num_physical_cores() {
siblings.insert(line); siblings.insert(line);
} }
} }
if (siblings.size() > 0) { if (!siblings.empty()) {
return static_cast<int32_t>(siblings.size()); return static_cast<int32_t>(siblings.size());
} }
#elif defined(__APPLE__) && defined(__MACH__) #elif defined(__APPLE__) && defined(__MACH__)

View File

@ -20,6 +20,9 @@
#define DIRECTORY_SEPARATOR '/' #define DIRECTORY_SEPARATOR '/'
#endif // _WIN32 #endif // _WIN32
#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0)
#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", ##__VA_ARGS__); exit(1); } while (0)
// //
// CLI argument parsing // CLI argument parsing
// //

View File

@ -415,6 +415,7 @@ namespace grammar_parser {
std::vector<const llama_grammar_element *> parse_state::c_rules() { std::vector<const llama_grammar_element *> parse_state::c_rules() {
std::vector<const llama_grammar_element *> ret; std::vector<const llama_grammar_element *> ret;
ret.reserve(rules.size());
for (const auto & rule : rules) { for (const auto & rule : rules) {
ret.push_back(rule.data()); ret.push_back(rule.data());
} }

View File

@ -1,5 +1,6 @@
#include "ggml.h" #include "ggml.h"
#include "llama.h" #include "llama.h"
#include "common.h"
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
@ -499,10 +500,10 @@ struct llama_file {
errno = 0; errno = 0;
std::size_t ret = std::fread(ptr, size, 1, fp); std::size_t ret = std::fread(ptr, size, 1, fp);
if (ferror(fp)) { if (ferror(fp)) {
throw std::runtime_error(format("read error: %s", strerror(errno))); die_fmt("fread failed: %s", strerror(errno));
} }
if (ret != 1) { if (ret != 1) {
throw std::runtime_error(std::string("unexpectedly reached end of file")); die("unexpectedly reached end of file");
} }
} }
@ -597,8 +598,7 @@ void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab)
printf("Assuming llama2.c vocabulary since %s is not a gguf file\n", filename); printf("Assuming llama2.c vocabulary since %s is not a gguf file\n", filename);
llama_file file(filename, "rb"); llama_file file(filename, "rb");
if (!file.fp) { if (!file.fp) {
fprintf(stderr, "error: %s: %s\n", strerror(errno), filename); die_fmt("%s: %s", strerror(errno), filename);
exit(1);
} }
const int n_vocab = config->vocab_size; const int n_vocab = config->vocab_size;
/* uint32_t max_token_length = */ file.read_u32(); // unused /* uint32_t max_token_length = */ file.read_u32(); // unused

View File

@ -23,7 +23,7 @@ extern "C" {
struct MyModel* create_mymodel(int argc, char ** argv) { struct MyModel* create_mymodel(int argc, char ** argv) {
gpt_params params; gpt_params params;
if (gpt_params_parse(argc, argv, params) == false) { if (!gpt_params_parse(argc, argv, params)) {
return nullptr; return nullptr;
} }

View File

@ -11,7 +11,7 @@
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
gpt_params params; gpt_params params;
if (gpt_params_parse(argc, argv, params) == false) { if (!gpt_params_parse(argc, argv, params)) {
return 1; return 1;
} }

View File

@ -953,7 +953,7 @@ int main(int argc, char ** argv) {
gpt_params params; gpt_params params;
if (gpt_params_parse(argc, argv, params) == false) { if (!gpt_params_parse(argc, argv, params)) {
return 1; return 1;
} }

View File

@ -925,7 +925,7 @@ int main(int argc, char ** argv) {
gpt_params params; gpt_params params;
if (gpt_params_parse(argc, argv, params) == false) { if (!gpt_params_parse(argc, argv, params)) {
return 1; return 1;
} }

View File

@ -48,8 +48,9 @@ static bool is_interacting = false;
void write_logfile( void write_logfile(
const llama_context * ctx, const gpt_params & params, const llama_model * model, const llama_context * ctx, const gpt_params & params, const llama_model * model,
const std::vector<llama_token> input_tokens, const std::string output, const std::vector<llama_token> output_tokens) { const std::vector<llama_token> & input_tokens, const std::string & output,
const std::vector<llama_token> & output_tokens
) {
if (params.logdir.empty()) { if (params.logdir.empty()) {
return; return;
} }
@ -109,7 +110,7 @@ int main(int argc, char ** argv) {
gpt_params params; gpt_params params;
g_params = &params; g_params = &params;
if (gpt_params_parse(argc, argv, params) == false) { if (!gpt_params_parse(argc, argv, params)) {
return 1; return 1;
} }
@ -303,7 +304,7 @@ int main(int argc, char ** argv) {
// debug message about similarity of saved session, if applicable // debug message about similarity of saved session, if applicable
size_t n_matching_session_tokens = 0; size_t n_matching_session_tokens = 0;
if (session_tokens.size() > 0) { if (!session_tokens.empty()) {
for (llama_token id : session_tokens) { for (llama_token id : session_tokens) {
if (n_matching_session_tokens >= embd_inp.size() || id != embd_inp[n_matching_session_tokens]) { if (n_matching_session_tokens >= embd_inp.size() || id != embd_inp[n_matching_session_tokens]) {
break; break;
@ -401,7 +402,7 @@ int main(int argc, char ** argv) {
LOG_TEE("%s: interactive mode on.\n", __func__); LOG_TEE("%s: interactive mode on.\n", __func__);
if (params.antiprompt.size()) { if (!params.antiprompt.empty()) {
for (const auto & antiprompt : params.antiprompt) { for (const auto & antiprompt : params.antiprompt) {
LOG_TEE("Reverse prompt: '%s'\n", antiprompt.c_str()); LOG_TEE("Reverse prompt: '%s'\n", antiprompt.c_str());
} }
@ -499,7 +500,7 @@ int main(int argc, char ** argv) {
while ((n_remain != 0 && !is_antiprompt) || params.interactive) { while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
// predict // predict
if (embd.size() > 0) { if (!embd.empty()) {
// Note: n_ctx - 4 here is to match the logic for commandline prompt handling via // Note: n_ctx - 4 here is to match the logic for commandline prompt handling via
// --prompt or --file which uses the same value. // --prompt or --file which uses the same value.
int max_embd_size = n_ctx - 4; int max_embd_size = n_ctx - 4;
@ -624,7 +625,7 @@ int main(int argc, char ** argv) {
LOG("n_past = %d\n", n_past); LOG("n_past = %d\n", n_past);
} }
if (embd.size() > 0 && !path_session.empty()) { if (!embd.empty() && !path_session.empty()) {
session_tokens.insert(session_tokens.end(), embd.begin(), embd.end()); session_tokens.insert(session_tokens.end(), embd.begin(), embd.end());
n_session_consumed = session_tokens.size(); n_session_consumed = session_tokens.size();
} }
@ -695,7 +696,7 @@ int main(int argc, char ** argv) {
// if not currently processing queued inputs; // if not currently processing queued inputs;
if ((int) embd_inp.size() <= n_consumed) { if ((int) embd_inp.size() <= n_consumed) {
// check for reverse prompt // check for reverse prompt
if (params.antiprompt.size()) { if (!params.antiprompt.empty()) {
std::string last_output; std::string last_output;
for (auto id : last_tokens) { for (auto id : last_tokens) {
last_output += llama_token_to_piece(ctx, id); last_output += llama_token_to_piece(ctx, id);
@ -732,7 +733,7 @@ int main(int argc, char ** argv) {
LOG("found EOS token\n"); LOG("found EOS token\n");
if (params.interactive) { if (params.interactive) {
if (params.antiprompt.size() != 0) { if (!params.antiprompt.empty()) {
// tokenize and inject first reverse prompt // tokenize and inject first reverse prompt
const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());

View File

@ -655,7 +655,7 @@ int main(int argc, char ** argv) {
gpt_params params; gpt_params params;
params.n_batch = 512; params.n_batch = 512;
if (gpt_params_parse(argc, argv, params) == false) { if (!gpt_params_parse(argc, argv, params)) {
return 1; return 1;
} }

View File

@ -71,7 +71,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
} }
// Check if a layer is included/excluded by command line // Check if a layer is included/excluded by command line
bool layer_included(const quantize_stats_params params, const std::string & layer) { bool layer_included(const quantize_stats_params & params, const std::string & layer) {
for (const auto& excluded : params.exclude_layers) { for (const auto& excluded : params.exclude_layers) {
if (std::regex_search(layer, std::regex(excluded))) { if (std::regex_search(layer, std::regex(excluded))) {
return false; return false;

View File

@ -143,10 +143,9 @@ int main(int argc, char ** argv) {
if (!try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) { if (!try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) {
fprintf(stderr, "%s: invalid ftype '%s'\n", __func__, argv[3]); fprintf(stderr, "%s: invalid ftype '%s'\n", __func__, argv[3]);
return 1; return 1;
} else { }
if (ftype_str == "COPY") { if (ftype_str == "COPY") {
params.only_copy = true; params.only_copy = true;
}
} }
arg_idx++; arg_idx++;
} }

View File

@ -13,7 +13,7 @@ int main(int argc, char ** argv) {
params.repeat_last_n = 64; params.repeat_last_n = 64;
params.prompt = "The quick brown fox"; params.prompt = "The quick brown fox";
if (gpt_params_parse(argc, argv, params) == false) { if (!gpt_params_parse(argc, argv, params)) {
return 1; return 1;
} }
@ -44,7 +44,7 @@ int main(int argc, char ** argv) {
llama_free_model(model); llama_free_model(model);
return 1; return 1;
} }
auto tokens = llama_tokenize(ctx, params.prompt.c_str(), true); auto tokens = llama_tokenize(ctx, params.prompt, true);
auto n_prompt_tokens = tokens.size(); auto n_prompt_tokens = tokens.size();
if (n_prompt_tokens < 1) { if (n_prompt_tokens < 1) {
fprintf(stderr, "%s : failed to tokenize prompt\n", __func__); fprintf(stderr, "%s : failed to tokenize prompt\n", __func__);

View File

@ -139,7 +139,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
} }
// convert a vector of completion_token_output to json // convert a vector of completion_token_output to json
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> probs) static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs)
{ {
json out = json::array(); json out = json::array();
for (const auto &prob : probs) for (const auto &prob : probs)
@ -271,7 +271,7 @@ struct llama_server_context
return true; return true;
} }
std::vector<llama_token> tokenize(json json_prompt, bool add_bos) std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
{ {
// If `add_bos` is true, we only add BOS, when json_prompt is a string, // If `add_bos` is true, we only add BOS, when json_prompt is a string,
// or the first element of the json_prompt array is a string. // or the first element of the json_prompt array is a string.
@ -611,7 +611,7 @@ struct llama_server_context
completion_token_output doCompletion() completion_token_output doCompletion()
{ {
const completion_token_output token_with_probs = nextToken(); auto token_with_probs = nextToken();
const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok); const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok);
generated_text += token_text; generated_text += token_text;
@ -1255,7 +1255,7 @@ void beam_search_callback(void * callback_data, llama_beams_state beams_state) {
struct token_translator { struct token_translator {
llama_context * ctx; llama_context * ctx;
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); } std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
std::string operator()(completion_token_output cto) const { return (*this)(cto.tok); } std::string operator()(const completion_token_output & cto) const { return (*this)(cto.tok); }
}; };
void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) { void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) {

View File

@ -169,10 +169,6 @@ struct my_llama_hparams {
float rope_freq_base = 10000.0f; float rope_freq_base = 10000.0f;
float rope_freq_scale = 1.0f; float rope_freq_scale = 1.0f;
bool operator!=(const my_llama_hparams& other) const {
return memcmp(this, &other, sizeof(my_llama_hparams));
}
}; };
struct my_llama_layer { struct my_llama_layer {
@ -929,28 +925,6 @@ void get_example_targets_batch(struct llama_context * lctx, const int * train_sa
} }
} }
#ifdef __GNUC__
#ifdef __MINGW32__
__attribute__((format(gnu_printf, 1, 2)))
#else
__attribute__((format(printf, 1, 2)))
#endif
#endif
static std::string format(const char * fmt, ...) {
va_list ap, ap2;
va_start(ap, fmt);
va_copy(ap2, ap);
int size = vsnprintf(NULL, 0, fmt, ap);
GGML_ASSERT(size >= 0 && size < INT_MAX);
std::vector<char> buf(size + 1);
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
GGML_ASSERT(size2 == size);
va_end(ap2);
va_end(ap);
return std::string(buf.data(), size);
}
int tokenize_file(struct llama_context * lctx, const char * filename, std::vector<llama_token>& out) { int tokenize_file(struct llama_context * lctx, const char * filename, std::vector<llama_token>& out) {
FILE * fp = std::fopen(filename, "rb"); FILE * fp = std::fopen(filename, "rb");
if (fp == NULL) { if (fp == NULL) {
@ -983,10 +957,10 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
out.resize(size+1); out.resize(size+1);
if (std::fread(buf.data(), size, 1, fp) != 1) { if (std::fread(buf.data(), size, 1, fp) != 1) {
throw std::runtime_error(std::string("unexpectedly reached end of file")); die("unexpectedly reached end of file");
} }
if (ferror(fp)) { if (ferror(fp)) {
throw std::runtime_error(format("read error: %s", strerror(errno))); die_fmt("fread failed: %s", strerror(errno));
} }
buf[size] = '\0'; buf[size] = '\0';
@ -1047,11 +1021,11 @@ void shuffle_ints(int * begin, int * end) {
if (kid >= 0) { \ if (kid >= 0) { \
enum gguf_type ktype = gguf_get_kv_type(ctx, kid); \ enum gguf_type ktype = gguf_get_kv_type(ctx, kid); \
if (ktype != (type)) { \ if (ktype != (type)) { \
throw std::runtime_error(format("key %s has wrong type: %s", skey.c_str(), gguf_type_name(ktype))); \ die_fmt("key %s has wrong type: %s", skey.c_str(), gguf_type_name(ktype)); \
} \ } \
(dst) = func(ctx, kid); \ (dst) = func(ctx, kid); \
} else if (req) { \ } else if (req) { \
throw std::runtime_error(format("key not found in model: %s", skey.c_str())); \ die_fmt("key not found in model: %s", skey.c_str()); \
} \ } \
} }
@ -1136,7 +1110,7 @@ void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_g
read_tensor_by_name(opt->lbfgs.lms, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S); read_tensor_by_name(opt->lbfgs.lms, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S);
read_tensor_by_name(opt->lbfgs.lmy, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y); read_tensor_by_name(opt->lbfgs.lmy, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y);
} else { } else {
throw std::runtime_error("unknown optimizer type\n"); die("unknown optimizer type");
} }
} }
@ -1315,20 +1289,20 @@ void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_mod
const int token_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_LIST)); const int token_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_LIST));
if (token_idx == -1) { if (token_idx == -1) {
throw std::runtime_error("cannot find tokenizer vocab in model file\n"); die("cannot find tokenizer vocab in model file");
} }
const uint32_t n_vocab = gguf_get_arr_n(vctx, token_idx); const uint32_t n_vocab = gguf_get_arr_n(vctx, token_idx);
const int score_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_SCORES)); const int score_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_SCORES));
if (score_idx == -1) { if (score_idx == -1) {
throw std::runtime_error("cannot find tokenizer scores in model file\n"); die("cannot find tokenizer scores in model file");
} }
const float * scores = (const float * ) gguf_get_arr_data(vctx, score_idx); const float * scores = (const float * ) gguf_get_arr_data(vctx, score_idx);
const int toktype_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE)); const int toktype_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE));
if (toktype_idx == -1) { if (toktype_idx == -1) {
throw std::runtime_error("cannot find token type list in GGUF file\n"); die("cannot find token type list in GGUF file");
} }
const int * toktypes = (const int * ) gguf_get_arr_data(vctx, toktype_idx); const int * toktypes = (const int * ) gguf_get_arr_data(vctx, toktype_idx);
@ -1356,7 +1330,7 @@ void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_mod
// read and copy bpe merges // read and copy bpe merges
const int merges_keyidx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_MERGES)); const int merges_keyidx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_MERGES));
if (merges_keyidx == -1) { if (merges_keyidx == -1) {
throw std::runtime_error("cannot find tokenizer merges in model file\n"); die("cannot find tokenizer merges in model file");
} }
const int n_merges = gguf_get_arr_n(vctx, merges_keyidx); const int n_merges = gguf_get_arr_n(vctx, merges_keyidx);
@ -1988,7 +1962,7 @@ void opt_callback(void * vdata, float * sched) {
float min_sched = params->adam_min_alpha / params->adam_alpha; float min_sched = params->adam_min_alpha / params->adam_alpha;
*sched = min_sched + *sched * (1.0f - min_sched); *sched = min_sched + *sched * (1.0f - min_sched);
int impr_plot = std::isnan(opt->loss_after) ? 0 : -(int)(1 + (opt->loss_before - opt->loss_after) * 10.0f + 0.5f); int impr_plot = std::isnan(opt->loss_after) ? 0 : -std::lround(1 + (opt->loss_before - opt->loss_after) * 10.0f);
printf("%s: iter=%*d, sched=%f loss0=%f loss=%f | improvement: %*d>\n", __func__, 6, opt->iter, *sched, opt->loss_before, opt->loss_after, impr_plot, (int)0); printf("%s: iter=%*d, sched=%f loss0=%f loss=%f | improvement: %*d>\n", __func__, 6, opt->iter, *sched, opt->loss_before, opt->loss_after, impr_plot, (int)0);
if (data->shuffle_countdown < n_batch) { if (data->shuffle_countdown < n_batch) {

View File

@ -138,7 +138,7 @@ static bool ggml_allocr_is_own(struct ggml_allocr * alloc, const struct ggml_ten
void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) { void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
#ifdef GGML_ALLOCATOR_DEBUG #ifdef GGML_ALLOCATOR_DEBUG
GGML_ASSERT(ggml_is_view(tensor) == false); // views generally get data pointer from one of their sources GGML_ASSERT(!ggml_is_view(tensor)); // views generally get data pointer from one of their sources
GGML_ASSERT(tensor->data == NULL); // avoid allocating tensor which already has memory allocated GGML_ASSERT(tensor->data == NULL); // avoid allocating tensor which already has memory allocated
#endif #endif
size_t size = ggml_allocr_get_alloc_size(alloc, tensor); size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
@ -165,14 +165,14 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
if (best_fit_block == -1) { if (best_fit_block == -1) {
// the last block is our last resort // the last block is our last resort
struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1]; struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
max_avail = MAX(max_avail, block->size);
if (block->size >= size) { if (block->size >= size) {
best_fit_block = alloc->n_free_blocks - 1; best_fit_block = alloc->n_free_blocks - 1;
max_avail = MAX(max_avail, block->size);
} else { } else {
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n", fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
__func__, size, max_avail); __func__, size, max_avail);
GGML_ASSERT(!"not enough space in the buffer"); GGML_ASSERT(!"not enough space in the buffer");
return; return;
} }
} }
struct free_block * block = &alloc->free_blocks[best_fit_block]; struct free_block * block = &alloc->free_blocks[best_fit_block];

10
ggml.c
View File

@ -4768,7 +4768,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
size_t obj_alloc_size = 0; size_t obj_alloc_size = 0;
if (view_src == NULL && ctx->no_alloc == false) { if (view_src == NULL && !ctx->no_alloc) {
if (ctx->scratch.data != NULL) { if (ctx->scratch.data != NULL) {
// allocate tensor data in the scratch buffer // allocate tensor data in the scratch buffer
if (ctx->scratch.offs + data_size > ctx->scratch.size) { if (ctx->scratch.offs + data_size > ctx->scratch.size) {
@ -5469,7 +5469,7 @@ static struct ggml_tensor * ggml_mul_impl(
} }
if (inplace) { if (inplace) {
GGML_ASSERT(is_node == false); GGML_ASSERT(!is_node);
} }
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
@ -5512,7 +5512,7 @@ static struct ggml_tensor * ggml_div_impl(
} }
if (inplace) { if (inplace) {
GGML_ASSERT(is_node == false); GGML_ASSERT(!is_node);
} }
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
@ -19957,7 +19957,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
struct ggml_tensor * data = NULL; struct ggml_tensor * data = NULL;
if (params.no_alloc == false) { if (!params.no_alloc) {
data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size); data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
ok = ok && data != NULL; ok = ok && data != NULL;
@ -19998,7 +19998,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
} }
// point the data member to the appropriate location in the binary blob using the tensor infos // point the data member to the appropriate location in the binary blob using the tensor infos
if (params.no_alloc == false) { if (!params.no_alloc) {
//cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
} }

View File

@ -3052,33 +3052,10 @@ static bool llama_is_control_token(const llama_vocab & vocab, llama_token id) {
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_CONTROL; return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_CONTROL;
} }
static bool llama_is_user_defined_token(const llama_vocab & vocab, llama_token id) {
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_USER_DEFINED;
}
static bool llama_is_unused_token(const llama_vocab & vocab, llama_token id) {
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_UNUSED;
}
static bool llama_is_byte_token(const llama_vocab & vocab, llama_token id) { static bool llama_is_byte_token(const llama_vocab & vocab, llama_token id) {
return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_BYTE; return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_BYTE;
} }
static bool llama_is_bos_token(const llama_vocab & vocab, llama_token id) {
GGML_ASSERT(llama_is_control_token(vocab, id));
return id == vocab.special_bos_id;
}
static bool llama_is_eos_token(const llama_vocab & vocab, llama_token id ) {
GGML_ASSERT(llama_is_control_token(vocab, id));
return id == vocab.special_eos_id;
}
static bool llama_is_pad_token(const llama_vocab & vocab, llama_token id ) {
GGML_ASSERT(id < 0 || llama_is_control_token(vocab, id));
return id == vocab.special_pad_id;
}
static uint8_t llama_token_to_byte(const llama_vocab & vocab, llama_token id) { static uint8_t llama_token_to_byte(const llama_vocab & vocab, llama_token id) {
GGML_ASSERT(llama_is_byte_token(vocab, id)); GGML_ASSERT(llama_is_byte_token(vocab, id));
const auto& token_data = vocab.id_to_token.at(id); const auto& token_data = vocab.id_to_token.at(id);
@ -4800,9 +4777,11 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
std::vector<std::thread> workers; std::vector<std::thread> workers;
std::mutex mutex; std::mutex mutex;
#ifdef GGML_USE_K_QUANTS
auto use_more_bits = [] (int i_layer, int num_layers) -> bool { auto use_more_bits = [] (int i_layer, int num_layers) -> bool {
return i_layer < num_layers/8 || i_layer >= 7*num_layers/8 || (i_layer - num_layers/8)%3 == 2; return i_layer < num_layers/8 || i_layer >= 7*num_layers/8 || (i_layer - num_layers/8)%3 == 2;
}; };
#endif
int idx = 0; int idx = 0;
@ -5947,7 +5926,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
rng_ss.str(std::string(&rng_buf[0], rng_size)); rng_ss.str(std::string(&rng_buf[0], rng_size));
rng_ss >> ctx->rng; rng_ss >> ctx->rng;
GGML_ASSERT(rng_ss.fail() == false); GGML_ASSERT(!rng_ss.fail());
} }
// set logits // set logits

View File

@ -76,7 +76,7 @@ void * align_with_offset(void * ptr, int offset) {
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset; return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
} }
void benchmark_function(size_t size, size_t q_size, int64_t iterations, std::function<size_t(void)> function) { void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<size_t(void)> & function) {
int64_t min_time_us = INT64_MAX; int64_t min_time_us = INT64_MAX;
int64_t total_time_us = 0; int64_t total_time_us = 0;
int64_t min_time_cycles = INT64_MAX; int64_t min_time_cycles = INT64_MAX;