diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 108c822cf..258536237 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -63,6 +63,7 @@ class Model: model_name: str | None metadata_override: Path | None dir_model_card: Path + is_lora: bool # subclasses should define this! model_arch: gguf.MODEL_ARCH @@ -70,7 +71,7 @@ class Model: def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False, use_temp_file: bool = False, eager: bool = False, metadata_override: Path | None = None, model_name: str | None = None, - split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): + split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False, is_lora: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") @@ -92,6 +93,7 @@ class Model: self.metadata_override = metadata_override self.model_name = model_name self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py + self.is_lora = is_lora # true if model is used inside convert_lora_to_gguf.py # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type if self.ftype == gguf.LlamaFileType.GUESSED: @@ -1593,7 +1595,8 @@ class LlamaModel(Model): smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor) rope_factors.append(1 / ((1 - smooth) / factor + smooth)) - self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32)) + if not self.is_lora: + self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32)) super().prepare_tensors() @@ -2140,8 +2143,9 @@ class Phi3MiniModel(Model): if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2: raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}') - self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32)) - self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32)) + if not self.is_lora: + self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32)) + self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32)) @Model.register("PlamoForCausalLM") @@ -3839,7 +3843,8 @@ class ExaoneModel(Model): smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor) rope_factors.append(1 / ((1 - smooth) / factor + smooth)) - self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32)) + if not self.is_lora: + self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32)) super().prepare_tensors() diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index a88d0d4a9..ddd347a2a 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -386,6 +386,7 @@ if __name__ == '__main__': dry_run=args.dry_run, dir_lora_model=dir_lora, lora_alpha=alpha, + is_lora=True, ) logger.info("Exporting model...") diff --git a/tests/test-lora-conversion-inference.sh b/tests/test-lora-conversion-inference.sh index c05c8e187..fe90ce0d1 100755 --- a/tests/test-lora-conversion-inference.sh +++ b/tests/test-lora-conversion-inference.sh @@ -14,7 +14,7 @@ MODELS_REPO_URL=https://huggingface.co/ggml-org/$MODELS_REPO # Clone the Hugging Face repository if the directory does not exist if [ ! -d "$MODELS_REPO" ]; then echo "Cloning the Hugging Face repository..." - git clone $MODELS_REPO_URL + git clone $MODELS_REPO_URL --depth 1 else echo "Repository already exists. Skipping clone." fi