diff --git a/ci/run.sh b/ci/run.sh index f03fd72ca..9703b77ce 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -287,7 +287,7 @@ function gg_run_open_llama_7b_v2 { (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log - python3 ../examples/convert-legacy-llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf model_f16="${path_models}/ggml-model-f16.gguf" model_q8_0="${path_models}/ggml-model-q8_0.gguf" diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7261c1736..d1422c413 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1161,7 +1161,7 @@ class FalconModel(Model): # So we rearrange them here,, so that we have n_head query weights # followed by n_head_kv key weights followed by n_head_kv value weights, # in contiguous fashion. - # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert_hf_to_gguf.py + # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-gguf.py if "query_key_value" in name: n_head = self.find_hparam(["num_attention_heads", "n_head"]) diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 344d034fc..e4165ae2d 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -15,7 +15,7 @@ # - Add a new model to the "models" list # - Run the script with your huggingface token: # -# python3 convert_hf_to_gguf-update.py +# python3 convert_hf_to_gguf_update.py # # - Copy-paste the generated get_vocab_base_pre() function into convert_hf_to_gguf.py # - Update llama.cpp with the new pre-tokenizer if necessary @@ -37,7 +37,7 @@ from enum import IntEnum, auto from transformers import AutoTokenizer logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger("convert_hf_to_gguf-update") +logger = logging.getLogger("convert_hf_to_gguf_update") sess = requests.Session() @@ -56,10 +56,10 @@ if len(sys.argv) == 2: token = sys.argv[1] if not token.startswith("hf_"): logger.info("Huggingface token seems invalid") - logger.info("Usage: python convert_hf_to_gguf-update.py ") + logger.info("Usage: python convert_hf_to_gguf_update.py ") sys.exit(1) else: - logger.info("Usage: python convert_hf_to_gguf-update.py ") + logger.info("Usage: python convert_hf_to_gguf_update.py ") sys.exit(1) # TODO: add models here, base models preferred @@ -201,7 +201,7 @@ src_func = f""" res = None - # NOTE: if you get an error here, you need to update the convert_hf_to_gguf-update.py script + # NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! {src_ifs} diff --git a/docs/HOWTO-add-model.md b/docs/HOWTO-add-model.md index a2a816804..87093cedd 100644 --- a/docs/HOWTO-add-model.md +++ b/docs/HOWTO-add-model.md @@ -17,7 +17,7 @@ Also, it is important to check that the examples and main ggml backends (CUDA, M ### 1. Convert the model to GGUF This step is done in python with a `convert` script using the [gguf](https://pypi.org/project/gguf/) library. -Depending on the model architecture, you can use either [convert_hf_to_gguf.py](../convert_hf_to_gguf.py) or [examples/convert-legacy-llama.py](../examples/convert-legacy-llama.py) (for `llama/llama2` models in `.pth` format). +Depending on the model architecture, you can use either [convert_hf_to_gguf.py](../convert_hf_to_gguf.py) or [examples/convert_legacy_llama.py](../examples/convert_legacy_llama.py) (for `llama/llama2` models in `.pth` format). The convert script reads the model configuration, tokenizer, tensor names+data and converts them to GGUF metadata and tensors.