mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
py : cleanup the code
- use f-strings where possible - drop first param of encode/decode functions since "utf-8" is the default
This commit is contained in:
parent
9733104be5
commit
cbef542879
@ -27,9 +27,9 @@ def read_tokens(fin, vocab_size):
|
|||||||
text_len = struct.unpack("i", fin.read(4))[0]
|
text_len = struct.unpack("i", fin.read(4))[0]
|
||||||
text_bytes = fin.read(text_len)
|
text_bytes = fin.read(text_len)
|
||||||
try:
|
try:
|
||||||
text = text_bytes.decode("utf-8")
|
text = text_bytes.decode()
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
text = text_bytes.decode("utf-8", "replace")
|
text = text_bytes.decode(errors="replace")
|
||||||
score = struct.unpack("f", fin.read(4))[0]
|
score = struct.unpack("f", fin.read(4))[0]
|
||||||
tokens.append((text, score))
|
tokens.append((text, score))
|
||||||
return tokens
|
return tokens
|
||||||
@ -82,7 +82,7 @@ def read_variables(fin):
|
|||||||
|
|
||||||
shape = tuple(struct.unpack("i" * n_dims, fin.read(4 * n_dims)))
|
shape = tuple(struct.unpack("i" * n_dims, fin.read(4 * n_dims)))
|
||||||
shape = shape[::-1]
|
shape = shape[::-1]
|
||||||
name = fin.read(name_length).decode("utf-8")
|
name = fin.read(name_length).decode()
|
||||||
|
|
||||||
# ensure tensor data is aligned
|
# ensure tensor data is aligned
|
||||||
tensor_data_offset = fin.tell()
|
tensor_data_offset = fin.tell()
|
||||||
@ -207,11 +207,11 @@ AI: Hello! How can I assist you today?
|
|||||||
print(ctx.rstrip("\n"))
|
print(ctx.rstrip("\n"))
|
||||||
while True:
|
while True:
|
||||||
print("-" * 60)
|
print("-" * 60)
|
||||||
prompt = input(f"User: ")
|
prompt = input("User: ")
|
||||||
if ctx != "":
|
if ctx != "":
|
||||||
ctx = ctx + "User: " + prompt + "\n"
|
ctx = f"{ctx}User: {prompt}\n"
|
||||||
else:
|
else:
|
||||||
ctx = prompt + "\nAI:"
|
ctx = f"{prompt}\nAI:"
|
||||||
|
|
||||||
ctx = (ctx[-1920:]) if len(ctx) >= 2048 else ctx
|
ctx = (ctx[-1920:]) if len(ctx) >= 2048 else ctx
|
||||||
|
|
||||||
@ -236,7 +236,7 @@ AI: Hello! How can I assist you today?
|
|||||||
)
|
)
|
||||||
s = generation_output.sequences[0]
|
s = generation_output.sequences[0]
|
||||||
decoded = tokenizer.decode(s)
|
decoded = tokenizer.decode(s)
|
||||||
ctx = decoded + "\n"
|
ctx = f"{decoded}\n"
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -49,7 +49,7 @@ def write_header(f_out, header):
|
|||||||
def write_tokens(fout, tokenizer):
|
def write_tokens(fout, tokenizer):
|
||||||
for i in range(tokenizer.vocab_size()):
|
for i in range(tokenizer.vocab_size()):
|
||||||
if tokenizer.is_unknown(i):
|
if tokenizer.is_unknown(i):
|
||||||
text = " \u2047 ".encode("utf-8")
|
text = " \u2047 ".encode()
|
||||||
elif tokenizer.is_control(i):
|
elif tokenizer.is_control(i):
|
||||||
text = b""
|
text = b""
|
||||||
elif tokenizer.is_byte(i):
|
elif tokenizer.is_byte(i):
|
||||||
@ -60,13 +60,13 @@ def write_tokens(fout, tokenizer):
|
|||||||
byte_value = int(piece[3:-1], 16)
|
byte_value = int(piece[3:-1], 16)
|
||||||
text = struct.pack("B", byte_value)
|
text = struct.pack("B", byte_value)
|
||||||
else:
|
else:
|
||||||
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
|
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode()
|
||||||
fout.write(struct.pack("i", len(text)))
|
fout.write(struct.pack("i", len(text)))
|
||||||
fout.write(text)
|
fout.write(text)
|
||||||
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
||||||
|
|
||||||
# TODO: GPT4All - add extra <pad> token
|
# TODO: GPT4All - add extra <pad> token
|
||||||
text = "<pad>".encode("utf-8")
|
text = "<pad>".encode()
|
||||||
fout.write(struct.pack("i", len(text)))
|
fout.write(struct.pack("i", len(text)))
|
||||||
fout.write(text)
|
fout.write(text)
|
||||||
fout.write(struct.pack("f", 0.0))
|
fout.write(struct.pack("f", 0.0))
|
||||||
|
@ -50,7 +50,7 @@ fout.write(struct.pack("i", 4))
|
|||||||
# This loop unchanged from convert-pth-to-ggml.py:
|
# This loop unchanged from convert-pth-to-ggml.py:
|
||||||
for i in range(tokenizer.vocab_size()):
|
for i in range(tokenizer.vocab_size()):
|
||||||
if tokenizer.is_unknown(i):
|
if tokenizer.is_unknown(i):
|
||||||
text = " \u2047 ".encode("utf-8")
|
text = " \u2047 ".encode()
|
||||||
elif tokenizer.is_control(i):
|
elif tokenizer.is_control(i):
|
||||||
text = b""
|
text = b""
|
||||||
elif tokenizer.is_byte(i):
|
elif tokenizer.is_byte(i):
|
||||||
@ -61,13 +61,13 @@ for i in range(tokenizer.vocab_size()):
|
|||||||
byte_value = int(piece[3:-1], 16)
|
byte_value = int(piece[3:-1], 16)
|
||||||
text = struct.pack("B", byte_value)
|
text = struct.pack("B", byte_value)
|
||||||
else:
|
else:
|
||||||
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
|
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode()
|
||||||
fout.write(struct.pack("i", len(text)))
|
fout.write(struct.pack("i", len(text)))
|
||||||
fout.write(text)
|
fout.write(text)
|
||||||
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
||||||
|
|
||||||
def write_header(shape, dst_name, ftype_cur):
|
def write_header(shape, dst_name, ftype_cur):
|
||||||
sname = dst_name.encode('utf-8')
|
sname = dst_name.encode()
|
||||||
fout.write(struct.pack("iii", len(shape), len(sname), ftype_cur))
|
fout.write(struct.pack("iii", len(shape), len(sname), ftype_cur))
|
||||||
fout.write(struct.pack("i" * len(shape), *shape[::-1]))
|
fout.write(struct.pack("i" * len(shape), *shape[::-1]))
|
||||||
fout.write(sname)
|
fout.write(sname)
|
||||||
@ -80,7 +80,7 @@ def write_header(shape, dst_name, ftype_cur):
|
|||||||
def convert_non_q4(src_name, dst_name):
|
def convert_non_q4(src_name, dst_name):
|
||||||
v = model[src_name]
|
v = model[src_name]
|
||||||
shape = v.shape
|
shape = v.shape
|
||||||
print("Processing non-Q4 variable: " + src_name + " with shape: ", shape, " and type: ", v.dtype)
|
print(f"Processing non-Q4 variable: {src_name} with shape: {shape} and type: {v.dtype}")
|
||||||
if len(shape) == 1:
|
if len(shape) == 1:
|
||||||
print(" Converting to float32")
|
print(" Converting to float32")
|
||||||
v = v.to(torch.float32)
|
v = v.to(torch.float32)
|
||||||
@ -105,7 +105,7 @@ def convert_q4(src_name, dst_name, permute=False):
|
|||||||
# Each int32 item is actually 8 int4 items packed together, and it's transposed.
|
# Each int32 item is actually 8 int4 items packed together, and it's transposed.
|
||||||
shape = (qweight.shape[0], qweight.shape[1] * 8)
|
shape = (qweight.shape[0], qweight.shape[1] * 8)
|
||||||
|
|
||||||
print("Processing Q4 variable: " + src_name + " with shape: ", shape)
|
print(f"Processing Q4 variable: {src_name} with shape: {shape}")
|
||||||
|
|
||||||
# The output format has the int4 weights in groups of 32 rather than 8.
|
# The output format has the int4 weights in groups of 32 rather than 8.
|
||||||
# It looks like this:
|
# It looks like this:
|
||||||
@ -168,5 +168,5 @@ for i in range(n_layer):
|
|||||||
|
|
||||||
fout.close()
|
fout.close()
|
||||||
|
|
||||||
print("Done. Output file: " + fname_out)
|
print(f"Done. Output file: {fname_out}")
|
||||||
print("")
|
print()
|
||||||
|
@ -120,7 +120,7 @@ def write_header(fout, hparams, ftype):
|
|||||||
def write_tokens(fout, tokenizer):
|
def write_tokens(fout, tokenizer):
|
||||||
for i in range(tokenizer.vocab_size()):
|
for i in range(tokenizer.vocab_size()):
|
||||||
if tokenizer.is_unknown(i):
|
if tokenizer.is_unknown(i):
|
||||||
text = " \u2047 ".encode("utf-8")
|
text = " \u2047 ".encode()
|
||||||
elif tokenizer.is_control(i):
|
elif tokenizer.is_control(i):
|
||||||
text = b""
|
text = b""
|
||||||
elif tokenizer.is_byte(i):
|
elif tokenizer.is_byte(i):
|
||||||
@ -131,7 +131,7 @@ def write_tokens(fout, tokenizer):
|
|||||||
byte_value = int(piece[3:-1], 16)
|
byte_value = int(piece[3:-1], 16)
|
||||||
text = struct.pack("B", byte_value)
|
text = struct.pack("B", byte_value)
|
||||||
else:
|
else:
|
||||||
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
|
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode()
|
||||||
fout.write(struct.pack("i", len(text)))
|
fout.write(struct.pack("i", len(text)))
|
||||||
fout.write(text)
|
fout.write(text)
|
||||||
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
||||||
@ -191,7 +191,7 @@ def process_and_write_variables(fout, model, ftype, part_id, n_parts):
|
|||||||
fullshape = list(partshape)
|
fullshape = list(partshape)
|
||||||
if n_dims > 1:
|
if n_dims > 1:
|
||||||
fullshape[split_dim] *= n_parts
|
fullshape[split_dim] *= n_parts
|
||||||
sname = name.encode('utf-8')
|
sname = name.encode()
|
||||||
fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur))
|
fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur))
|
||||||
for dim in reversed(fullshape):
|
for dim in reversed(fullshape):
|
||||||
fout.write(struct.pack("i", dim))
|
fout.write(struct.pack("i", dim))
|
||||||
|
@ -44,7 +44,7 @@ def write_header(f_out, header):
|
|||||||
def write_tokens(fout, tokenizer):
|
def write_tokens(fout, tokenizer):
|
||||||
for i in range(tokenizer.vocab_size()):
|
for i in range(tokenizer.vocab_size()):
|
||||||
if tokenizer.is_unknown(i):
|
if tokenizer.is_unknown(i):
|
||||||
text = " \u2047 ".encode("utf-8")
|
text = " \u2047 ".encode()
|
||||||
elif tokenizer.is_control(i):
|
elif tokenizer.is_control(i):
|
||||||
text = b""
|
text = b""
|
||||||
elif tokenizer.is_byte(i):
|
elif tokenizer.is_byte(i):
|
||||||
@ -55,7 +55,7 @@ def write_tokens(fout, tokenizer):
|
|||||||
byte_value = int(piece[3:-1], 16)
|
byte_value = int(piece[3:-1], 16)
|
||||||
text = struct.pack("B", byte_value)
|
text = struct.pack("B", byte_value)
|
||||||
else:
|
else:
|
||||||
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
|
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode()
|
||||||
fout.write(struct.pack("i", len(text)))
|
fout.write(struct.pack("i", len(text)))
|
||||||
fout.write(text)
|
fout.write(text)
|
||||||
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
fout.write(struct.pack("f", tokenizer.get_score(i)))
|
||||||
|
@ -272,13 +272,11 @@ def main():
|
|||||||
tokens = read_tokens(fin, hparams)
|
tokens = read_tokens(fin, hparams)
|
||||||
|
|
||||||
if hparams['magic'] == 0x67676a74: # ggjt
|
if hparams['magic'] == 0x67676a74: # ggjt
|
||||||
print("%s: input ggml has already been converted to 'ggjt' magic\n" %
|
print(f"{args.fin_path}: input ggml has already been converted to 'ggjt' magic\n")
|
||||||
(args.fin_path))
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if hparams['magic'] != 0x67676d66: # ggmf
|
if hparams['magic'] != 0x67676d66: # ggmf
|
||||||
print("%s: input ggml file doesn't have expected 'ggmf' magic: %#x\n" %
|
print(f"{args.fin_path}: input ggml file doesn't have expected 'ggmf' magic: {hparams['magic']:#x}\n")
|
||||||
(args.fin_path, hparams['magic']))
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
hparams['magic'] = 0x67676a74 # ggjt
|
hparams['magic'] = 0x67676a74 # ggjt
|
||||||
@ -286,7 +284,7 @@ def main():
|
|||||||
# count number of multipart files by convention
|
# count number of multipart files by convention
|
||||||
n_parts = 1
|
n_parts = 1
|
||||||
while True:
|
while True:
|
||||||
if os.path.exists("%s.%d" % (args.fin_path, n_parts)):
|
if os.path.exists(f"{args.fin_path}.{n_parts}"):
|
||||||
n_parts += 1
|
n_parts += 1
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
@ -302,7 +300,7 @@ def main():
|
|||||||
print(f"Processing part {part_id+1} of {n_parts}\n")
|
print(f"Processing part {part_id+1} of {n_parts}\n")
|
||||||
fin_path = args.fin_path
|
fin_path = args.fin_path
|
||||||
if part_id > 0:
|
if part_id > 0:
|
||||||
fin_path += ".%d" % (part_id)
|
fin_path += f".{part_id}"
|
||||||
with open(fin_path, "rb") as fin:
|
with open(fin_path, "rb") as fin:
|
||||||
read_tokens(fin, read_hparams(fin))
|
read_tokens(fin, read_hparams(fin))
|
||||||
copy_tensors(fin, fout, part_id, n_parts)
|
copy_tensors(fin, fout, part_id, n_parts)
|
||||||
|
Loading…
Reference in New Issue
Block a user