llama : check that all the tensor data is in the model file (#6885)

* llama : check that all the tensor data is in the model file

* also check for unsigned overflow
This commit is contained in:
slaren 2024-04-25 15:23:47 +02:00 committed by GitHub
parent 51543729ff
commit 0ead1f1072
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2999,9 +2999,13 @@ struct llama_model_loader {
ggml_tensor * tensor; ggml_tensor * tensor;
llama_tensor_weight(uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) { llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
const int tensor_idx = gguf_find_tensor(gguf_ctx, name); const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx); offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) {
throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name));
}
} }
}; };
std::vector<llama_tensor_weight> weights; std::vector<llama_tensor_weight> weights;
@ -3040,15 +3044,15 @@ struct llama_model_loader {
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false); get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
llm_kv = LLM_KV(llm_arch_from_string(arch_name)); llm_kv = LLM_KV(llm_arch_from_string(arch_name));
files.emplace_back(new llama_file(fname.c_str(), "rb"));
contexts.emplace_back(ctx);
// Save tensors data offset of the main file. // Save tensors data offset of the main file.
// For subsidiary files, `meta` tensor data offset must not be used, // For subsidiary files, `meta` tensor data offset must not be used,
// so we build a unified tensors index for weights. // so we build a unified tensors index for weights.
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
weights.emplace_back(0, cur->name, meta, cur); weights.emplace_back(files.back().get(), 0, cur->name, meta, cur);
} }
files.emplace_back(new llama_file(fname.c_str(), "rb"));
contexts.emplace_back(ctx);
uint16_t n_split = 0; uint16_t n_split = 0;
get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false); get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false);
@ -3082,13 +3086,14 @@ struct llama_model_loader {
throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path)); throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path));
} }
// Save tensors data offset info of the shard.
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
weights.emplace_back(idx, cur->name, ctx_gguf, cur);
}
files.emplace_back(new llama_file(split_path, "rb")); files.emplace_back(new llama_file(split_path, "rb"));
contexts.emplace_back(ctx); contexts.emplace_back(ctx);
// Save tensors data offset info of the shard.
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
weights.emplace_back(files.back().get(), idx, cur->name, ctx_gguf, cur);
}
gguf_free(ctx_gguf); gguf_free(ctx_gguf);
} }