mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
py : open merges file as 'utf-8' (#4566)
Otherwise, on Windows converting bling-phi-2-v0 (<https://huggingface.co/llmware/bling-phi-2-v0>) via convert-hf-to-gguf.py will fail with the following error: ``` Traceback (most recent call last): File "C:\Users\User\git\gguf\convert-hf-to-gguf.py", line 1061, in <module> model_instance.set_vocab() File "C:\Users\User\git\gguf\convert-hf-to-gguf.py", line 52, in set_vocab self._set_vocab_gpt2() File "C:\Users\User\git\gguf\convert-hf-to-gguf.py", line 264, in _set_vocab_gpt2 special_vocab = gguf.SpecialVocab(dir_model, load_merges=True) File "C:\Users\User\git\gguf\gguf\vocab.py", line 33, in __init__ self._load(Path(path)) File "C:\Users\User\git\gguf\gguf\vocab.py", line 81, in _load self._try_load_merges_txt(path) File "C:\Users\User\git\gguf\gguf\vocab.py", line 95, in _try_load_merges_txt for line in fp: File "C:\Users\User\miniconda3\envs\gguf\lib\encodings\cp1252.py", line 23, in decode return codecs.charmap_decode(input,self.errors,decoding_table)[0] UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 1415: character maps to <undefined> ```
This commit is contained in:
parent
66f35a2f48
commit
880e352277
@ -84,7 +84,7 @@ class SpecialVocab:
|
|||||||
merges_file = path / 'merges.txt'
|
merges_file = path / 'merges.txt'
|
||||||
if not merges_file.is_file():
|
if not merges_file.is_file():
|
||||||
return False
|
return False
|
||||||
with open(merges_file, 'r') as fp:
|
with open(merges_file, 'r', encoding = 'utf-8') as fp:
|
||||||
first_line = next(fp, '').strip()
|
first_line = next(fp, '').strip()
|
||||||
if not first_line.startswith('#'):
|
if not first_line.startswith('#'):
|
||||||
fp.seek(0)
|
fp.seek(0)
|
||||||
|
Loading…
Reference in New Issue
Block a user