llama.cpp/convert_lora_to_gguf.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from __future__ import annotations

from dataclasses import dataclass
import logging
import argparse
import os
import sys
from pathlib import Path
from types import EllipsisType
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Sequence, SupportsIndex, cast

import torch

if TYPE_CHECKING:
    from torch import Tensor

if 'NO_LOCAL_GGUF' not in os.environ:
    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf

# reuse model definitions from convert_hf_to_gguf.py
from convert_hf_to_gguf import Model

logger = logging.getLogger("lora-to-gguf")


@dataclass
class PartialLoraTensor:
    A: Tensor | None = None
    B: Tensor | None = None


# magic to support tensor shape modifications and splitting
class LoraTorchTensor:
    _lora_A: Tensor
    _lora_B: Tensor
    _rank: int

    def __init__(self, A: Tensor, B: Tensor):
        assert len(A.shape) == len(B.shape)
        if A.dtype != B.dtype:
            A = A.to(torch.float32)
            B = B.to(torch.float32)
        self._lora_A = A
        self._lora_B = B
        assert self._lora_A.shape[-2] == self._lora_B.shape[-1]
        self._rank = self._lora_B.shape[-1]

    def __getitem__(
        self,
        indices: (
            SupportsIndex
            | slice
            | tuple[SupportsIndex | slice | EllipsisType | Tensor, ...]
        ),
    ) -> LoraTorchTensor:
        shape = self.shape
        if isinstance(indices, (SupportsIndex, slice)):
            if len(shape) > 2:
                return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
            else:
                raise NotImplementedError
        elif isinstance(indices, tuple):
            assert len(indices) > 0
            if isinstance(indices[-1], EllipsisType):
                return self[indices[:-1]]
            # expand ellipsis
            indices = tuple(
                u
                for v in (
                    (
                        (slice(None, None) for _ in range(len(indices) - 1))
                        if isinstance(i, EllipsisType)
                        else (i,)
                    )
                    for i in indices
                )
                for u in v
            )

            if len(indices) < len(shape):
                indices = (*indices, *(slice(None, None) for _ in range(len(indices), len(shape))))

            # TODO: make sure this is correct
            # lora_A has a shape which looks like (..., 1, 1, rank, self.shape[-1])
            indices_A = (
                *(
                    0 if isinstance(i, SupportsIndex) else slice(None, None)
                    for i in indices[:-2]
                ),
                slice(None, None),
                indices[-1],
            )
            indices_B = indices[:-1]
            return LoraTorchTensor(self._lora_A[indices_A], self._lora_B[indices_B])
        else:
            raise NotImplementedError

    @property
    def dtype(self) -> torch.dtype:
        assert self._lora_A.dtype == self._lora_B.dtype
        return self._lora_A.dtype

    @property
    def shape(self) -> tuple[int, ...]:
        return (*self._lora_B.shape[:-1], self._lora_A.shape[-1])

    def size(self, dim=None):
        assert dim is None
        return self.shape

    def reshape(self, *shape: int | tuple[int]) -> LoraTorchTensor:
        if isinstance(shape[0], tuple):
            new_shape: tuple[int] = shape[0]
        else:
            new_shape = cast(tuple[int], shape)
        orig_shape = self.shape
        if new_shape[-1] != orig_shape[-1]:
            raise NotImplementedError
        return LoraTorchTensor(
            self._lora_A.reshape((*(1 for _ in new_shape[:-2]), *self._lora_A.shape[-2:])),
            self._lora_B.reshape((*new_shape[:-1], self._rank)),
        )

    def reshape_as(self, other: Tensor) -> LoraTorchTensor:
        return self.reshape(*other.shape)

    def view(self, *size: int) -> LoraTorchTensor:
        return self.reshape(*size)

    def permute(self, *dims: int) -> LoraTorchTensor:
        shape = self.shape
        dims = tuple(dim - len(shape) if dim >= 0 else dim for dim in dims)
        if dims[-1] == -2 and dims[-2] == -1:
            return LoraTorchTensor(self._lora_B.permute(*dims), self._lora_A.permute(*dims))
        else:
            assert dims[-1] == -1
            assert all(dim == 1 for dim in self._lora_A.shape[:-2])
            return LoraTorchTensor(self._lora_A, self._lora_B.permute(*dims))

    def transpose(self, dim0: int, dim1: int) -> LoraTorchTensor:
        shape = self.shape
        dims = [i for i in range(len(shape))]
        dims[dim0], dims[dim1] = dims[dim1], dims[dim0]
        return self.permute(*dims)

    def swapaxes(self, axis0: int, axis1: int) -> LoraTorchTensor:
        return self.transpose(axis0, axis1)

    def to(self, *args, **kwargs):
        return LoraTorchTensor(self._lora_A.to(*args, **kwargs), self._lora_B.to(*args, **kwargs))

    @classmethod
    def __torch_function__(cls, func: Callable, types, args=(), kwargs=None):
        del types  # unused

        if kwargs is None:
            kwargs = {}

        if func is torch.permute:
            return type(args[0]).permute(*args, **kwargs)
        elif func is torch.reshape:
            return type(args[0]).reshape(*args, **kwargs)
        elif func is torch.stack:
            assert isinstance(args[0], Sequence)
            dim = kwargs.get("dim", 0)
            assert dim == 0
            return LoraTorchTensor(
                torch.stack([a._lora_A for a in args[0]], dim),
                torch.stack([b._lora_B for b in args[0]], dim),
            )
        elif func is torch.cat:
            assert isinstance(args[0], Sequence)
            dim = kwargs.get("dim", 0)
            assert dim == 0
            if len(args[0][0].shape) > 2:
                return LoraTorchTensor(
                    torch.cat([a._lora_A for a in args[0]], dim),
                    torch.cat([b._lora_B for b in args[0]], dim),
                )
            else:
                return LoraTorchTensor(
                    args[0][0]._lora_A,  # TODO: is this correct? (can't cat over the rank)
                    torch.cat([b._lora_B for b in args[0]], dim),
                )
        else:
            raise NotImplementedError


def get_base_tensor_name(lora_tensor_name: str) -> str:
    base_name = lora_tensor_name.replace("base_model.model.", "")
    base_name = base_name.replace(".lora_A.weight", ".weight")
    base_name = base_name.replace(".lora_B.weight", ".weight")
    return base_name


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Convert a huggingface PEFT LoRA adapter to a GGML compatible file")
    parser.add_argument(
        "--outfile", type=Path,
        help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
    )
    parser.add_argument(
        "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0"], default="f16",
        help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0",
    )
    parser.add_argument(
        "--bigendian", action="store_true",
        help="model is executed on big endian machine",
    )
    parser.add_argument(
        "--verbose", action="store_true",
        help="increase output verbosity",
    )
    parser.add_argument(
        "--base", type=Path, required=True,
        help="directory containing base model file",
    )
    parser.add_argument(
        "lora_path", type=Path,
        help="directory containing LoRA adapter file",
    )

    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

    ftype_map: dict[str, gguf.LlamaFileType] = {
        "f32": gguf.LlamaFileType.ALL_F32,
        "f16": gguf.LlamaFileType.MOSTLY_F16,
        "bf16": gguf.LlamaFileType.MOSTLY_BF16,
        "q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
    }
    ftype = ftype_map[args.outtype]

    dir_base_model = args.base
    dir_lora = args.lora_path
    input_json = os.path.join(dir_lora, "adapter_config.json")
    input_model = os.path.join(dir_lora, "adapter_model.safetensors")
    if args.outfile is not None:
        fname_out = args.outfile
    else:
        # output in the same directory as the model by default
        fname_out = dir_lora / 'ggml-lora-{ftype}.gguf'

    if os.path.exists(input_model):
        # lazy import load_file only if lora is in safetensors format.
        from safetensors.torch import load_file

        lora_model = load_file(input_model, device="cpu")
    else:
        input_model = os.path.join(dir_lora, "adapter_model.bin")
        lora_model = torch.load(input_model, map_location="cpu", weights_only=True)

    # load base model
    logger.info(f"Loading base model: {dir_base_model.name}")
    hparams = Model.load_hparams(dir_base_model)
    with torch.inference_mode():
        try:
            model_class = Model.from_model_architecture(hparams["architectures"][0])
        except NotImplementedError:
            logger.error(f"Model {hparams['architectures'][0]} is not supported")
            sys.exit(1)

        class LoraModel(model_class):
            model_arch = model_class.model_arch

            def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
                tensor_map: dict[str, PartialLoraTensor] = {}

                for name, tensor in lora_model.items():
                    base_name = get_base_tensor_name(name)
                    is_lora_a = ".lora_A.weight" in name
                    is_lora_b = ".lora_B.weight" in name
                    if not is_lora_a and not is_lora_b:
                        if ".base_layer.weight" in name:
                            continue
                        logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
                        sys.exit(1)

                    if base_name in tensor_map:
                        if is_lora_a:
                            tensor_map[base_name].A = tensor
                        else:
                            tensor_map[base_name].B = tensor
                    else:
                        if is_lora_a:
                            tensor_map[base_name] = PartialLoraTensor(A=tensor)
                        else:
                            tensor_map[base_name] = PartialLoraTensor(B=tensor)

                for name, tensor in tensor_map.items():
                    assert tensor.A is not None
                    assert tensor.B is not None
                    yield (name, cast(torch.Tensor, LoraTorchTensor(tensor.A, tensor.B)))

            def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
                dest = super().modify_tensors(data_torch, name, bid)
                for dest_name, dest_data in dest:
                    assert isinstance(dest_data, LoraTorchTensor)
                    # logger.info(f"{orig_name} --> {dest_name}")
                    yield (dest_name + ".lora_a", dest_data._lora_A)
                    yield (dest_name + ".lora_b", dest_data._lora_B)

        model_instance = LoraModel(dir_base_model, ftype, fname_out, args.bigendian, False, False, None)
        logger.info("Set model parameters")
        model_instance.set_gguf_parameters()

        # adapter_config = json.load(input_json)
        model_instance.gguf_writer.add_string("training.type", "finetune_lora")

        model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
        logger.info("Exporting model...")
        model_instance.write()
        logger.info(f"Model successfully exported to {model_instance.fname_out}")
add convert script 2024-07-08 14:35:27 +00:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

			`from __future__ import annotations`

convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`from dataclasses import dataclass`
add convert script 2024-07-08 14:35:27 +00:00			`import logging`
			`import argparse`
			`import os`
			`import sys`
			`from pathlib import Path`
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`from types import EllipsisType`
			`from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Sequence, SupportsIndex, cast`
add convert script 2024-07-08 14:35:27 +00:00
			`import torch`

			`if TYPE_CHECKING:`
			`from torch import Tensor`

			`if 'NO_LOCAL_GGUF' not in os.environ:`
			`sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))`
			`import gguf`

			`# reuse model definitions from convert_hf_to_gguf.py`
			`from convert_hf_to_gguf import Model`

			`logger = logging.getLogger("lora-to-gguf")`

fix ftype 2024-07-08 19:55:41 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`@dataclass`
			`class PartialLoraTensor:`
			`A: Tensor \| None = None`
			`B: Tensor \| None = None`


			`# magic to support tensor shape modifications and splitting`
			`class LoraTorchTensor:`
			`_lora_A: Tensor`
			`_lora_B: Tensor`
			`_rank: int`

			`def __init__(self, A: Tensor, B: Tensor):`
			`assert len(A.shape) == len(B.shape)`
			`if A.dtype != B.dtype:`
			`A = A.to(torch.float32)`
			`B = B.to(torch.float32)`
			`self._lora_A = A`
			`self._lora_B = B`
			`assert self._lora_A.shape[-2] == self._lora_B.shape[-1]`
			`self._rank = self._lora_B.shape[-1]`

			`def __getitem__(`
			`self,`
			`indices: (`
			`SupportsIndex`
			`\| slice`
			`\| tuple[SupportsIndex \| slice \| EllipsisType \| Tensor, ...]`
			`),`
			`) -> LoraTorchTensor:`
			`shape = self.shape`
			`if isinstance(indices, (SupportsIndex, slice)):`
			`if len(shape) > 2:`
			`return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])`
			`else:`
			`raise NotImplementedError`
			`elif isinstance(indices, tuple):`
			`assert len(indices) > 0`
			`if isinstance(indices[-1], EllipsisType):`
			`return self[indices[:-1]]`
			`# expand ellipsis`
			`indices = tuple(`
			`u`
			`for v in (`
			`(`
			`(slice(None, None) for _ in range(len(indices) - 1))`
			`if isinstance(i, EllipsisType)`
			`else (i,)`
			`)`
			`for i in indices`
			`)`
			`for u in v`
			`)`

			`if len(indices) < len(shape):`
			`indices = (indices, (slice(None, None) for _ in range(len(indices), len(shape))))`

			`# TODO: make sure this is correct`
			`# lora_A has a shape which looks like (..., 1, 1, rank, self.shape[-1])`
			`indices_A = (`
			`*(`
			`0 if isinstance(i, SupportsIndex) else slice(None, None)`
			`for i in indices[:-2]`
			`),`
			`slice(None, None),`
			`indices[-1],`
			`)`
			`indices_B = indices[:-1]`
			`return LoraTorchTensor(self._lora_A[indices_A], self._lora_B[indices_B])`
			`else:`
			`raise NotImplementedError`

			`@property`
			`def dtype(self) -> torch.dtype:`
			`assert self._lora_A.dtype == self._lora_B.dtype`
			`return self._lora_A.dtype`

			`@property`
			`def shape(self) -> tuple[int, ...]:`
			`return (*self._lora_B.shape[:-1], self._lora_A.shape[-1])`

			`def size(self, dim=None):`
			`assert dim is None`
			`return self.shape`

			`def reshape(self, *shape: int \| tuple[int]) -> LoraTorchTensor:`
			`if isinstance(shape[0], tuple):`
			`new_shape: tuple[int] = shape[0]`
			`else:`
			`new_shape = cast(tuple[int], shape)`
			`orig_shape = self.shape`
			`if new_shape[-1] != orig_shape[-1]:`
			`raise NotImplementedError`
			`return LoraTorchTensor(`
			`self._lora_A.reshape(((1 for _ in new_shape[:-2]), self._lora_A.shape[-2:])),`
			`self._lora_B.reshape((*new_shape[:-1], self._rank)),`
			`)`

			`def reshape_as(self, other: Tensor) -> LoraTorchTensor:`
			`return self.reshape(*other.shape)`

			`def view(self, *size: int) -> LoraTorchTensor:`
			`return self.reshape(*size)`

			`def permute(self, *dims: int) -> LoraTorchTensor:`
			`shape = self.shape`
			`dims = tuple(dim - len(shape) if dim >= 0 else dim for dim in dims)`
			`if dims[-1] == -2 and dims[-2] == -1:`
			`return LoraTorchTensor(self._lora_B.permute(dims), self._lora_A.permute(dims))`
			`else:`
			`assert dims[-1] == -1`
			`assert all(dim == 1 for dim in self._lora_A.shape[:-2])`
			`return LoraTorchTensor(self._lora_A, self._lora_B.permute(*dims))`

			`def transpose(self, dim0: int, dim1: int) -> LoraTorchTensor:`
			`shape = self.shape`
			`dims = [i for i in range(len(shape))]`
			`dims[dim0], dims[dim1] = dims[dim1], dims[dim0]`
			`return self.permute(*dims)`

			`def swapaxes(self, axis0: int, axis1: int) -> LoraTorchTensor:`
			`return self.transpose(axis0, axis1)`

			`def to(self, args, *kwargs):`
			`return LoraTorchTensor(self._lora_A.to(args, kwargs), self._lora_B.to(args, **kwargs))`

			`@classmethod`
			`def __torch_function__(cls, func: Callable, types, args=(), kwargs=None):`
			`del types # unused`

			`if kwargs is None:`
			`kwargs = {}`

			`if func is torch.permute:`
			`return type(args[0]).permute(args, *kwargs)`
			`elif func is torch.reshape:`
			`return type(args[0]).reshape(args, *kwargs)`
			`elif func is torch.stack:`
			`assert isinstance(args[0], Sequence)`
			`dim = kwargs.get("dim", 0)`
			`assert dim == 0`
			`return LoraTorchTensor(`
			`torch.stack([a._lora_A for a in args[0]], dim),`
			`torch.stack([b._lora_B for b in args[0]], dim),`
			`)`
			`elif func is torch.cat:`
			`assert isinstance(args[0], Sequence)`
			`dim = kwargs.get("dim", 0)`
			`assert dim == 0`
			`if len(args[0][0].shape) > 2:`
			`return LoraTorchTensor(`
			`torch.cat([a._lora_A for a in args[0]], dim),`
			`torch.cat([b._lora_B for b in args[0]], dim),`
			`)`
			`else:`
			`return LoraTorchTensor(`
			`args[0][0]._lora_A, # TODO: is this correct? (can't cat over the rank)`
			`torch.cat([b._lora_B for b in args[0]], dim),`
			`)`
			`else:`
			`raise NotImplementedError`


conversion: only allow selected models 2024-07-09 22:23:07 +00:00			`def get_base_tensor_name(lora_tensor_name: str) -> str:`
			`base_name = lora_tensor_name.replace("base_model.model.", "")`
			`base_name = base_name.replace(".lora_A.weight", ".weight")`
			`base_name = base_name.replace(".lora_B.weight", ".weight")`
			`return base_name`


add convert script 2024-07-08 14:35:27 +00:00			`def parse_args() -> argparse.Namespace:`
			`parser = argparse.ArgumentParser(`
fix ftype 2024-07-08 19:55:41 +00:00			`description="Convert a huggingface PEFT LoRA adapter to a GGML compatible file")`
add convert script 2024-07-08 14:35:27 +00:00			`parser.add_argument(`
			`"--outfile", type=Path,`
fix outfile 2024-07-08 20:05:35 +00:00			`help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",`
add convert script 2024-07-08 14:35:27 +00:00			`)`
			`parser.add_argument(`
fix ftype 2024-07-08 19:55:41 +00:00			`"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0"], default="f16",`
			`help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0",`
add convert script 2024-07-08 14:35:27 +00:00			`)`
			`parser.add_argument(`
			`"--bigendian", action="store_true",`
			`help="model is executed on big endian machine",`
			`)`
			`parser.add_argument(`
			`"--verbose", action="store_true",`
			`help="increase output verbosity",`
			`)`
			`parser.add_argument(`
			`"--base", type=Path, required=True,`
			`help="directory containing base model file",`
			`)`
			`parser.add_argument(`
			`"lora_path", type=Path,`
			`help="directory containing LoRA adapter file",`
			`)`

			`return parser.parse_args()`


			`if __name__ == '__main__':`
			`args = parse_args()`
			`logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)`

			`ftype_map: dict[str, gguf.LlamaFileType] = {`
			`"f32": gguf.LlamaFileType.ALL_F32,`
			`"f16": gguf.LlamaFileType.MOSTLY_F16,`
			`"bf16": gguf.LlamaFileType.MOSTLY_BF16,`
			`"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,`
			`}`
fix ftype 2024-07-08 19:55:41 +00:00			`ftype = ftype_map[args.outtype]`
add convert script 2024-07-08 14:35:27 +00:00
			`dir_base_model = args.base`
			`dir_lora = args.lora_path`
			`input_json = os.path.join(dir_lora, "adapter_config.json")`
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`input_model = os.path.join(dir_lora, "adapter_model.safetensors")`
add convert script 2024-07-08 14:35:27 +00:00			`if args.outfile is not None:`
			`fname_out = args.outfile`
			`else:`
			`# output in the same directory as the model by default`
fix outfile 2024-07-08 20:05:35 +00:00			`fname_out = dir_lora / 'ggml-lora-{ftype}.gguf'`
add convert script 2024-07-08 14:35:27 +00:00
			`if os.path.exists(input_model):`
			`# lazy import load_file only if lora is in safetensors format.`
			`from safetensors.torch import load_file`
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00
add convert script 2024-07-08 14:35:27 +00:00			`lora_model = load_file(input_model, device="cpu")`
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`else:`
			`input_model = os.path.join(dir_lora, "adapter_model.bin")`
			`lora_model = torch.load(input_model, map_location="cpu", weights_only=True)`
add convert script 2024-07-08 14:35:27 +00:00
			`# load base model`
			`logger.info(f"Loading base model: {dir_base_model.name}")`
			`hparams = Model.load_hparams(dir_base_model)`
			`with torch.inference_mode():`
			`try:`
			`model_class = Model.from_model_architecture(hparams["architectures"][0])`
			`except NotImplementedError:`
			`logger.error(f"Model {hparams['architectures'][0]} is not supported")`
			`sys.exit(1)`

convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`class LoraModel(model_class):`
			`model_arch = model_class.model_arch`
add convert script 2024-07-08 14:35:27 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`def get_tensors(self) -> Iterator[tuple[str, Tensor]]:`
			`tensor_map: dict[str, PartialLoraTensor] = {}`
add convert script 2024-07-08 14:35:27 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`for name, tensor in lora_model.items():`
			`base_name = get_base_tensor_name(name)`
			`is_lora_a = ".lora_A.weight" in name`
			`is_lora_b = ".lora_B.weight" in name`
			`if not is_lora_a and not is_lora_b:`
			`if ".base_layer.weight" in name:`
			`continue`
			`logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")`
			`sys.exit(1)`
add convert script 2024-07-08 14:35:27 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`if base_name in tensor_map:`
			`if is_lora_a:`
			`tensor_map[base_name].A = tensor`
			`else:`
			`tensor_map[base_name].B = tensor`
			`else:`
			`if is_lora_a:`
			`tensor_map[base_name] = PartialLoraTensor(A=tensor)`
			`else:`
			`tensor_map[base_name] = PartialLoraTensor(B=tensor)`
add convert script 2024-07-08 14:35:27 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`for name, tensor in tensor_map.items():`
			`assert tensor.A is not None`
			`assert tensor.B is not None`
			`yield (name, cast(torch.Tensor, LoraTorchTensor(tensor.A, tensor.B)))`
add f16 convert 2024-07-08 15:05:17 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`def modify_tensors(self, data_torch: Tensor, name: str, bid: int \| None) -> Iterable[tuple[str, Tensor]]:`
			`dest = super().modify_tensors(data_torch, name, bid)`
			`for dest_name, dest_data in dest:`
			`assert isinstance(dest_data, LoraTorchTensor)`
			`# logger.info(f"{orig_name} --> {dest_name}")`
			`yield (dest_name + ".lora_a", dest_data._lora_A)`
			`yield (dest_name + ".lora_b", dest_data._lora_B)`
conversion: only allow selected models 2024-07-09 22:23:07 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`model_instance = LoraModel(dir_base_model, ftype, fname_out, args.bigendian, False, False, None)`
			`logger.info("Set model parameters")`
			`model_instance.set_gguf_parameters()`
conversion: only allow selected models 2024-07-09 22:23:07 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`# adapter_config = json.load(input_json)`
			`model_instance.gguf_writer.add_string("training.type", "finetune_lora")`
fix ftype 2024-07-08 19:55:41 +00:00
convert_lora : MoE LoRA conversion support * convert_lora : prefer safetensors, similarly to convert_hf 2024-07-09 22:26:38 +00:00			`model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)`
			`logger.info("Exporting model...")`
			`model_instance.write()`
			`logger.info(f"Model successfully exported to {model_instance.fname_out}")`