mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
gguf : enforce that tensor names are unique (#6905)
* not allow adding duplicated tensor name * no duplicated tensor while reading gguf * typo * throw exception inside llama_model_loader Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
parent
ce023f6f2f
commit
7bb36ccf91
12
ggml.c
12
ggml.c
@ -20819,6 +20819,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
// TODO: return an error instead of crashing with GGML_ASSERT
|
// TODO: return an error instead of crashing with GGML_ASSERT
|
||||||
gguf_tensor_info_sanitize(info);
|
gguf_tensor_info_sanitize(info);
|
||||||
|
|
||||||
|
// make sure there is no duplicated tensor names
|
||||||
|
for (uint64_t j = 0; j < i; ++j) {
|
||||||
|
if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
|
||||||
|
fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
|
||||||
|
ok = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
||||||
fclose(file);
|
fclose(file);
|
||||||
@ -21355,6 +21363,10 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|||||||
void gguf_add_tensor(
|
void gguf_add_tensor(
|
||||||
struct gguf_context * ctx,
|
struct gguf_context * ctx,
|
||||||
const struct ggml_tensor * tensor) {
|
const struct ggml_tensor * tensor) {
|
||||||
|
if (gguf_find_tensor(ctx, tensor->name) != -1) {
|
||||||
|
GGML_ASSERT(false && "duplicated tensor name");
|
||||||
|
}
|
||||||
|
|
||||||
const int idx = ctx->header.n_tensors;
|
const int idx = ctx->header.n_tensors;
|
||||||
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
|
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
|
||||||
|
|
||||||
|
@ -234,8 +234,14 @@ class GGUFReader:
|
|||||||
|
|
||||||
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
||||||
tensors = []
|
tensors = []
|
||||||
|
tensor_names = set() # keep track of name to prevent duplicated tensors
|
||||||
for field in fields:
|
for field in fields:
|
||||||
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
|
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
|
||||||
|
# check if there's any tensor having same name already in the list
|
||||||
|
tensor_name = str(bytes(name_data), encoding = 'utf-8')
|
||||||
|
if tensor_name in tensor_names:
|
||||||
|
raise ValueError(f'Found duplicated tensor with name {tensor_name}')
|
||||||
|
tensor_names.add(tensor_name)
|
||||||
ggml_type = GGMLQuantizationType(raw_dtype[0])
|
ggml_type = GGMLQuantizationType(raw_dtype[0])
|
||||||
n_elems = np.prod(dims)
|
n_elems = np.prod(dims)
|
||||||
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
|
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
|
||||||
@ -267,7 +273,7 @@ class GGUFReader:
|
|||||||
item_count = n_bytes
|
item_count = n_bytes
|
||||||
item_type = np.uint8
|
item_type = np.uint8
|
||||||
tensors.append(ReaderTensor(
|
tensors.append(ReaderTensor(
|
||||||
name = str(bytes(name_data), encoding = 'utf-8'),
|
name = tensor_name,
|
||||||
tensor_type = ggml_type,
|
tensor_type = ggml_type,
|
||||||
shape = dims,
|
shape = dims,
|
||||||
n_elements = n_elems,
|
n_elements = n_elems,
|
||||||
|
@ -63,6 +63,7 @@ class GGUFWriter:
|
|||||||
self.kv_data_count = 0
|
self.kv_data_count = 0
|
||||||
self.ti_data = bytearray()
|
self.ti_data = bytearray()
|
||||||
self.ti_data_count = 0
|
self.ti_data_count = 0
|
||||||
|
self.ti_names = set()
|
||||||
self.use_temp_file = use_temp_file
|
self.use_temp_file = use_temp_file
|
||||||
self.temp_file = None
|
self.temp_file = None
|
||||||
self.tensors = []
|
self.tensors = []
|
||||||
@ -197,6 +198,10 @@ class GGUFWriter:
|
|||||||
if self.state is not WriterState.EMPTY:
|
if self.state is not WriterState.EMPTY:
|
||||||
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
||||||
|
|
||||||
|
if name in self.ti_names:
|
||||||
|
raise ValueError(f'Duplicated tensor name {name}')
|
||||||
|
self.ti_names.add(name)
|
||||||
|
|
||||||
encoded_name = name.encode("utf8")
|
encoded_name = name.encode("utf8")
|
||||||
self.ti_data += self._pack("Q", len(encoded_name))
|
self.ti_data += self._pack("Q", len(encoded_name))
|
||||||
self.ti_data += encoded_name
|
self.ti_data += encoded_name
|
||||||
|
@ -3120,9 +3120,17 @@ struct llama_model_loader {
|
|||||||
|
|
||||||
fver = (enum llama_fver) gguf_get_version(meta);
|
fver = (enum llama_fver) gguf_get_version(meta);
|
||||||
|
|
||||||
|
std::set<std::string> tensor_names;
|
||||||
for (auto & w : weights) {
|
for (auto & w : weights) {
|
||||||
n_elements += ggml_nelements(w.tensor);
|
n_elements += ggml_nelements(w.tensor);
|
||||||
n_bytes += ggml_nbytes(w.tensor);
|
n_bytes += ggml_nbytes(w.tensor);
|
||||||
|
// make sure there is no duplicated tensor names
|
||||||
|
const std::string name(w.tensor->name);
|
||||||
|
auto found = tensor_names.find(name);
|
||||||
|
if (found != tensor_names.end()) {
|
||||||
|
throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", w.tensor->name));
|
||||||
|
}
|
||||||
|
tensor_names.insert(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
|
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
|
||||||
|
Loading…
Reference in New Issue
Block a user