mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
gguf : support big endian platform (#3552)
* check whether platform is 390x if yes->do not import immintrin.h * support s390x big endian * support --bigendian option for s390x 1. verified with baichuan7b-chat with float 16 on s390x 2. verified with baichuan7b-chat 3. verified with chinese-alpaca-2-13b-f16 * update format based on editor-config checker result * Update convert-baichuan-hf-to-gguf.py * 1. check in ggml.c if endianess is not match 2. update GGUF version 3. change get_pack_prefix to property 4. update information log * always use "GGUF" as beginng of GGUF file * Compare "GGUF" with file header char by char 1. Set GGUF_MAGIC to "GGUF" string instead of int value 2. Compare "GGUF" char by char to ensure its byte order 3. Move bytes swap code from convert.py to gguf.py write_tensor_data --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
a0edf73bda
commit
8cf19d60dc
@ -76,6 +76,7 @@ def parse_args() -> argparse.Namespace:
|
|||||||
"ftype", type=int, choices=[0, 1], default=1, nargs='?',
|
"ftype", type=int, choices=[0, 1], default=1, nargs='?',
|
||||||
help="output format - use 0 for float32, 1 for float16",
|
help="output format - use 0 for float32, 1 for float16",
|
||||||
)
|
)
|
||||||
|
parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine")
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
@ -86,6 +87,11 @@ if not dir_model.is_dir():
|
|||||||
print(f'Error: {args.model} is not a directory', file = sys.stderr)
|
print(f'Error: {args.model} is not a directory', file = sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
endianess = gguf.GGUFEndian.LITTLE
|
||||||
|
if args.bigendian:
|
||||||
|
endianess = gguf.GGUFEndian.BIG
|
||||||
|
endianess_str = "Big Endian" if args.bigendian else "Little Endian"
|
||||||
|
print(f"gguf: Conversion Endianess {endianess}")
|
||||||
# possible tensor data types
|
# possible tensor data types
|
||||||
# ftype == 0 -> float32
|
# ftype == 0 -> float32
|
||||||
# ftype == 1 -> float16
|
# ftype == 1 -> float16
|
||||||
@ -113,7 +119,7 @@ if hparams["architectures"][0] != "BaichuanForCausalLM":
|
|||||||
num_parts = count_model_parts(dir_model)
|
num_parts = count_model_parts(dir_model)
|
||||||
print(f"num_parts:{num_parts}\n")
|
print(f"num_parts:{num_parts}\n")
|
||||||
ARCH=gguf.MODEL_ARCH.BAICHUAN
|
ARCH=gguf.MODEL_ARCH.BAICHUAN
|
||||||
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
|
||||||
|
|
||||||
print("gguf: get model metadata")
|
print("gguf: get model metadata")
|
||||||
|
|
||||||
|
20
convert.py
20
convert.py
@ -803,8 +803,8 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
|
|||||||
|
|
||||||
|
|
||||||
class OutputFile:
|
class OutputFile:
|
||||||
def __init__(self, fname_out: Path) -> None:
|
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian=gguf.GGUFEndian.LITTLE) -> None:
|
||||||
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
|
||||||
|
|
||||||
def add_meta_arch(self, params: Params) -> None:
|
def add_meta_arch(self, params: Params) -> None:
|
||||||
name = "LLaMA"
|
name = "LLaMA"
|
||||||
@ -875,10 +875,10 @@ class OutputFile:
|
|||||||
self.gguf.close()
|
self.gguf.close()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab) -> None:
|
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian=gguf.GGUFEndian.LITTLE) -> None:
|
||||||
check_vocab_size(params, vocab)
|
check_vocab_size(params, vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out)
|
of = OutputFile(fname_out, endianess=endianess)
|
||||||
|
|
||||||
# meta data
|
# meta data
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
@ -903,10 +903,10 @@ class OutputFile:
|
|||||||
return dt.quantize(arr)
|
return dt.quantize(arr)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY) -> None:
|
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
|
||||||
check_vocab_size(params, vocab)
|
check_vocab_size(params, vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out)
|
of = OutputFile(fname_out, endianess=endianess)
|
||||||
|
|
||||||
# meta data
|
# meta data
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
@ -1123,8 +1123,9 @@ def main(args_in: list[str] | None = None) -> None:
|
|||||||
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
|
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
|
||||||
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
|
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
|
||||||
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
|
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
|
||||||
args = parser.parse_args(args_in)
|
parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine")
|
||||||
|
|
||||||
|
args = parser.parse_args(args_in)
|
||||||
if args.dump_single:
|
if args.dump_single:
|
||||||
model_plus = lazy_load_file(args.model)
|
model_plus = lazy_load_file(args.model)
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
@ -1138,6 +1139,9 @@ def main(args_in: list[str] | None = None) -> None:
|
|||||||
if args.dump:
|
if args.dump:
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
return
|
return
|
||||||
|
endianess = gguf.GGUFEndian.LITTLE
|
||||||
|
if args.bigendian:
|
||||||
|
endianess = gguf.GGUFEndian.BIG
|
||||||
|
|
||||||
params = Params.load(model_plus)
|
params = Params.load(model_plus)
|
||||||
if params.n_ctx == -1:
|
if params.n_ctx == -1:
|
||||||
@ -1185,7 +1189,7 @@ def main(args_in: list[str] | None = None) -> None:
|
|||||||
params.ftype = ftype
|
params.ftype = ftype
|
||||||
print(f"Writing {outfile}, format {ftype}")
|
print(f"Writing {outfile}, format {ftype}")
|
||||||
|
|
||||||
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency)
|
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess)
|
||||||
print(f"Wrote {outfile}")
|
print(f"Wrote {outfile}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -536,7 +536,7 @@ static bool is_ggml_file(const char * filename) {
|
|||||||
if (file.size < 4) {
|
if (file.size < 4) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
uint32_t magic = file.read_u32();
|
std::string magic = file.read_string(4);
|
||||||
return magic == GGUF_MAGIC;
|
return magic == GGUF_MAGIC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
15
ggml.c
15
ggml.c
@ -20845,7 +20845,7 @@ struct gguf_kv {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct gguf_header {
|
struct gguf_header {
|
||||||
uint32_t magic;
|
char magic[4];
|
||||||
uint32_t version;
|
uint32_t version;
|
||||||
uint64_t n_tensors; // GGUFv2
|
uint64_t n_tensors; // GGUFv2
|
||||||
uint64_t n_kv; // GGUFv2
|
uint64_t n_kv; // GGUFv2
|
||||||
@ -20915,7 +20915,7 @@ static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset)
|
|||||||
struct gguf_context * gguf_init_empty(void) {
|
struct gguf_context * gguf_init_empty(void) {
|
||||||
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
||||||
|
|
||||||
ctx->header.magic = GGUF_MAGIC;
|
memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
|
||||||
ctx->header.version = GGUF_VERSION;
|
ctx->header.version = GGUF_VERSION;
|
||||||
ctx->header.n_tensors = 0;
|
ctx->header.n_tensors = 0;
|
||||||
ctx->header.n_kv = 0;
|
ctx->header.n_kv = 0;
|
||||||
@ -20941,18 +20941,20 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
// offset from start of file
|
// offset from start of file
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
|
|
||||||
uint32_t magic = 0;
|
char magic[4];
|
||||||
|
|
||||||
// check the magic before making allocations
|
// check the magic before making allocations
|
||||||
{
|
{
|
||||||
gguf_fread_el(file, &magic, sizeof(magic), &offset);
|
gguf_fread_el(file, &magic, sizeof(magic), &offset);
|
||||||
|
|
||||||
if (magic != GGUF_MAGIC) {
|
for (uint32_t i = 0; i < sizeof(magic); i++) {
|
||||||
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
|
if (magic[i] != GGUF_MAGIC[i]) {
|
||||||
|
fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic);
|
||||||
fclose(file);
|
fclose(file);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
|
|
||||||
@ -20960,7 +20962,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
|
|
||||||
// read the header
|
// read the header
|
||||||
{
|
{
|
||||||
ctx->header.magic = magic;
|
strncpy(ctx->header.magic, magic, 4);
|
||||||
|
|
||||||
|
|
||||||
ctx->kv = NULL;
|
ctx->kv = NULL;
|
||||||
ctx->infos = NULL;
|
ctx->infos = NULL;
|
||||||
|
5
ggml.h
5
ggml.h
@ -231,8 +231,9 @@
|
|||||||
#define GGML_EXIT_SUCCESS 0
|
#define GGML_EXIT_SUCCESS 0
|
||||||
#define GGML_EXIT_ABORTED 1
|
#define GGML_EXIT_ABORTED 1
|
||||||
|
|
||||||
#define GGUF_MAGIC 0x46554747 // "GGUF"
|
#define GGUF_MAGIC "GGUF"
|
||||||
#define GGUF_VERSION 2
|
|
||||||
|
#define GGUF_VERSION 3
|
||||||
|
|
||||||
#define GGUF_DEFAULT_ALIGNMENT 32
|
#define GGUF_DEFAULT_ALIGNMENT 32
|
||||||
|
|
||||||
|
@ -19,9 +19,10 @@ import numpy as np
|
|||||||
#
|
#
|
||||||
|
|
||||||
GGUF_MAGIC = 0x46554747
|
GGUF_MAGIC = 0x46554747
|
||||||
GGUF_VERSION = 2
|
GGUF_VERSION = 3
|
||||||
GGUF_DEFAULT_ALIGNMENT = 32
|
GGUF_DEFAULT_ALIGNMENT = 32
|
||||||
|
|
||||||
|
|
||||||
# general
|
# general
|
||||||
KEY_GENERAL_ARCHITECTURE = "general.architecture"
|
KEY_GENERAL_ARCHITECTURE = "general.architecture"
|
||||||
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
|
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
|
||||||
@ -597,6 +598,10 @@ class GGMLQuantizationType(IntEnum):
|
|||||||
Q6_K = 14
|
Q6_K = 14
|
||||||
Q8_K = 15
|
Q8_K = 15
|
||||||
|
|
||||||
|
class GGUFEndian(IntEnum):
|
||||||
|
LITTLE = 0
|
||||||
|
BIG = 1
|
||||||
|
|
||||||
|
|
||||||
class GGUFValueType(IntEnum):
|
class GGUFValueType(IntEnum):
|
||||||
UINT8 = 0
|
UINT8 = 0
|
||||||
@ -644,18 +649,41 @@ class GGUFWriter:
|
|||||||
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
|
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
|
||||||
tensors: list[tuple[np.ndarray[Any, Any], int]]
|
tensors: list[tuple[np.ndarray[Any, Any], int]]
|
||||||
|
|
||||||
def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True):
|
@property
|
||||||
|
def pack_prefix(self):
|
||||||
|
if self.endianess==GGUFEndian.LITTLE:
|
||||||
|
return "<"
|
||||||
|
else:
|
||||||
|
return ">"
|
||||||
|
|
||||||
|
def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True, endianess=GGUFEndian.LITTLE):
|
||||||
self.fout = open(path, "wb")
|
self.fout = open(path, "wb")
|
||||||
self.arch = arch
|
self.arch = arch
|
||||||
|
self.endianess = endianess
|
||||||
|
self._simple_value_packing = {
|
||||||
|
GGUFValueType.UINT8: f"{self.pack_prefix}B",
|
||||||
|
GGUFValueType.INT8: f"{self.pack_prefix}b",
|
||||||
|
GGUFValueType.UINT16: f"{self.pack_prefix}H",
|
||||||
|
GGUFValueType.INT16: f"{self.pack_prefix}h",
|
||||||
|
GGUFValueType.UINT32: f"{self.pack_prefix}I",
|
||||||
|
GGUFValueType.INT32: f"{self.pack_prefix}i",
|
||||||
|
GGUFValueType.FLOAT32: f"{self.pack_prefix}f",
|
||||||
|
GGUFValueType.UINT64: f"{self.pack_prefix}Q",
|
||||||
|
GGUFValueType.INT64: f"{self.pack_prefix}q",
|
||||||
|
GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
|
||||||
|
GGUFValueType.BOOL: "?" ,
|
||||||
|
}
|
||||||
self.add_architecture()
|
self.add_architecture()
|
||||||
self.use_temp_file = use_temp_file
|
self.use_temp_file = use_temp_file
|
||||||
self.tensors = []
|
self.tensors = []
|
||||||
|
endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
|
||||||
|
print(f"This gguf file is for {endianess_str} only")
|
||||||
|
|
||||||
def write_header_to_file(self):
|
def write_header_to_file(self):
|
||||||
self.fout.write(struct.pack("<I", GGUF_MAGIC))
|
self.fout.write(struct.pack("<I", GGUF_MAGIC))
|
||||||
self.fout.write(struct.pack("<I", GGUF_VERSION))
|
self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
|
||||||
self.fout.write(struct.pack("<Q", self.ti_data_count))
|
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
|
||||||
self.fout.write(struct.pack("<Q", self.kv_data_count))
|
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
|
||||||
self.flush()
|
self.flush()
|
||||||
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
|
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
|
||||||
|
|
||||||
@ -727,25 +755,12 @@ class GGUFWriter:
|
|||||||
self.add_key(key)
|
self.add_key(key)
|
||||||
self.add_val(val, GGUFValueType.ARRAY)
|
self.add_val(val, GGUFValueType.ARRAY)
|
||||||
|
|
||||||
_simple_value_packing = {
|
|
||||||
GGUFValueType.UINT8: "<B",
|
|
||||||
GGUFValueType.INT8: "<b",
|
|
||||||
GGUFValueType.UINT16: "<H",
|
|
||||||
GGUFValueType.INT16: "<h",
|
|
||||||
GGUFValueType.UINT32: "<I",
|
|
||||||
GGUFValueType.INT32: "<i",
|
|
||||||
GGUFValueType.FLOAT32: "<f",
|
|
||||||
GGUFValueType.UINT64: "<Q",
|
|
||||||
GGUFValueType.INT64: "<q",
|
|
||||||
GGUFValueType.FLOAT64: "<d",
|
|
||||||
GGUFValueType.BOOL: "?" ,
|
|
||||||
}
|
|
||||||
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True):
|
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True):
|
||||||
if vtype is None:
|
if vtype is None:
|
||||||
vtype = GGUFValueType.get_type(val)
|
vtype = GGUFValueType.get_type(val)
|
||||||
|
|
||||||
if add_vtype:
|
if add_vtype:
|
||||||
self.kv_data += struct.pack("<I", vtype)
|
self.kv_data += struct.pack(f"{self.pack_prefix}I", vtype)
|
||||||
self.kv_data_count += 1
|
self.kv_data_count += 1
|
||||||
|
|
||||||
pack_fmt = self._simple_value_packing.get(vtype)
|
pack_fmt = self._simple_value_packing.get(vtype)
|
||||||
@ -753,14 +768,14 @@ class GGUFWriter:
|
|||||||
self.kv_data += struct.pack(pack_fmt, val)
|
self.kv_data += struct.pack(pack_fmt, val)
|
||||||
elif vtype == GGUFValueType.STRING:
|
elif vtype == GGUFValueType.STRING:
|
||||||
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
||||||
self.kv_data += struct.pack("<Q", len(encoded_val))
|
self.kv_data += struct.pack(f"{self.pack_prefix}Q", len(encoded_val))
|
||||||
self.kv_data += encoded_val
|
self.kv_data += encoded_val
|
||||||
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0:
|
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0:
|
||||||
ltype = GGUFValueType.get_type(val[0])
|
ltype = GGUFValueType.get_type(val[0])
|
||||||
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
||||||
raise ValueError("All items in a GGUF array should be of the same type")
|
raise ValueError("All items in a GGUF array should be of the same type")
|
||||||
self.kv_data += struct.pack("<I", ltype)
|
self.kv_data += struct.pack(f"{self.pack_prefix}I", ltype)
|
||||||
self.kv_data += struct.pack("<Q", len(val))
|
self.kv_data += struct.pack(f"{self.pack_prefix}Q", len(val))
|
||||||
for item in val:
|
for item in val:
|
||||||
self.add_val(item, add_vtype=False)
|
self.add_val(item, add_vtype=False)
|
||||||
else:
|
else:
|
||||||
@ -774,22 +789,24 @@ class GGUFWriter:
|
|||||||
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
||||||
|
|
||||||
encoded_name = name.encode("utf8")
|
encoded_name = name.encode("utf8")
|
||||||
self.ti_data += struct.pack("<Q", len(encoded_name))
|
self.ti_data += struct.pack(f"{self.pack_prefix}Q", len(encoded_name))
|
||||||
self.ti_data += encoded_name
|
self.ti_data += encoded_name
|
||||||
n_dims = len(tensor_shape)
|
n_dims = len(tensor_shape)
|
||||||
self.ti_data += struct.pack("<I", n_dims)
|
self.ti_data += struct.pack(f"{self.pack_prefix}I", n_dims)
|
||||||
for i in range(n_dims):
|
for i in range(n_dims):
|
||||||
self.ti_data += struct.pack("<Q", tensor_shape[n_dims - 1 - i])
|
self.ti_data += struct.pack(f"{self.pack_prefix}Q", tensor_shape[n_dims - 1 - i])
|
||||||
if raw_dtype is None:
|
if raw_dtype is None:
|
||||||
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
||||||
else:
|
else:
|
||||||
dtype = raw_dtype
|
dtype = raw_dtype
|
||||||
self.ti_data += struct.pack("<I", dtype)
|
self.ti_data += struct.pack(f"{self.pack_prefix}I", dtype)
|
||||||
self.ti_data += struct.pack("<Q", self.offset_tensor)
|
self.ti_data += struct.pack(f"{self.pack_prefix}Q", self.offset_tensor)
|
||||||
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
||||||
self.ti_data_count += 1
|
self.ti_data_count += 1
|
||||||
|
|
||||||
def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None):
|
def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None):
|
||||||
|
if self.endianess == GGUFEndian.BIG:
|
||||||
|
tensor.byteswap(inplace=True)
|
||||||
if self.use_temp_file and self.temp_file is None:
|
if self.use_temp_file and self.temp_file is None:
|
||||||
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
|
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
|
||||||
fp.seek(0)
|
fp.seek(0)
|
||||||
@ -815,6 +832,8 @@ class GGUFWriter:
|
|||||||
fp.write(bytes([0] * pad))
|
fp.write(bytes([0] * pad))
|
||||||
|
|
||||||
def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
|
def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
|
||||||
|
if self.endianess==GGUFEndian.BIG:
|
||||||
|
tensor.byteswap(inplace=True)
|
||||||
self.write_padding(self.fout, self.fout.tell())
|
self.write_padding(self.fout, self.fout.tell())
|
||||||
tensor.tofile(self.fout)
|
tensor.tofile(self.fout)
|
||||||
self.write_padding(self.fout, tensor.nbytes)
|
self.write_padding(self.fout, tensor.nbytes)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "gguf"
|
name = "gguf"
|
||||||
version = "0.4.4"
|
version = "0.4.5"
|
||||||
description = "Write ML models in GGUF for GGML"
|
description = "Write ML models in GGUF for GGML"
|
||||||
authors = ["GGML <ggml@ggml.ai>"]
|
authors = ["GGML <ggml@ggml.ai>"]
|
||||||
packages = [
|
packages = [
|
||||||
|
@ -46,7 +46,7 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
|
|||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#else
|
#else
|
||||||
#if !defined(__riscv)
|
#if !defined(__riscv) && !defined(__s390__)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -4,7 +4,9 @@
|
|||||||
|
|
||||||
#undef NDEBUG
|
#undef NDEBUG
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#if !defined(__riscv) && !defined(__s390__)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
Loading…
Reference in New Issue
Block a user