mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
convert : skip unaccessible HF repos (#7210)
This commit is contained in:
parent
988631335a
commit
3292733f95
@ -145,8 +145,17 @@ for model in models:
|
|||||||
if tokt == TOKENIZER_TYPE.SPM:
|
if tokt == TOKENIZER_TYPE.SPM:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Skip if the tokenizer folder does not exist or there are other download issues previously
|
||||||
|
if not os.path.exists(f"models/tokenizers/{name}"):
|
||||||
|
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
|
||||||
|
continue
|
||||||
|
|
||||||
# create the tokenizer
|
# create the tokenizer
|
||||||
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
|
||||||
|
continue # Skip to the next model if the tokenizer can't be loaded
|
||||||
|
|
||||||
chktok = tokenizer.encode(chktxt)
|
chktok = tokenizer.encode(chktxt)
|
||||||
chkhsh = sha256(str(chktok).encode()).hexdigest()
|
chkhsh = sha256(str(chktok).encode()).hexdigest()
|
||||||
@ -287,8 +296,17 @@ for model in models:
|
|||||||
name = model["name"]
|
name = model["name"]
|
||||||
tokt = model["tokt"]
|
tokt = model["tokt"]
|
||||||
|
|
||||||
|
# Skip if the tokenizer folder does not exist or there are other download issues previously
|
||||||
|
if not os.path.exists(f"models/tokenizers/{name}"):
|
||||||
|
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
|
||||||
|
continue
|
||||||
|
|
||||||
# create the tokenizer
|
# create the tokenizer
|
||||||
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
|
||||||
|
continue # Skip this model and continue with the next one in the loop
|
||||||
|
|
||||||
with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
|
with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
|
||||||
for text in tests:
|
for text in tests:
|
||||||
|
Loading…
Reference in New Issue
Block a user