py : add XLMRobertaForSequenceClassification [no ci]

2025-01-03 15:24:35 +00:00 · 2024-09-16 16:59:17 +03:00 · 2024-09-16 16:59:17 +03:00 · 3453e62bb9
commit 3453e62bb9
parent 1e43630218
3 changed files with 20 additions and 1 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -2598,7 +2598,7 @@ class NomicBertModel(BertModel):
        self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
-@Model.register("XLMRobertaModel")
+@Model.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
 class XLMRobertaModel(BertModel):
    model_arch = gguf.MODEL_ARCH.BERT
@ -2701,6 +2701,11 @@ class XLMRobertaModel(BertModel):
            if self._position_offset is not None:
                data_torch = data_torch[self._position_offset:,:]
        # if name starts with "roberta.", remove the prefix
        # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
        if name.startswith("roberta."):
            name = name[8:]
        return super().modify_tensors(data_torch, name, bid)
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -343,6 +343,8 @@ class MODEL_TENSOR(IntEnum):
    ENC_FFN_DOWN         = auto()
    ENC_FFN_UP           = auto()
    ENC_OUTPUT_NORM      = auto()
    CLS                  = auto() # classifier
    CLS_OUT              = auto() # classifier output projection
 MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@ -501,6 +503,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
    MODEL_TENSOR.ENC_FFN_DOWN:              "enc.blk.{bid}.ffn_down",
    MODEL_TENSOR.ENC_FFN_UP:                "enc.blk.{bid}.ffn_up",
    MODEL_TENSOR.ENC_OUTPUT_NORM:           "enc.output_norm",
    MODEL_TENSOR.CLS:                       "cls",
    MODEL_TENSOR.CLS_OUT:                   "cls.output",
 }
 MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@ -610,6 +614,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
        MODEL_TENSOR.FFN_DOWN,
        MODEL_TENSOR.FFN_UP,
        MODEL_TENSOR.LAYER_OUT_NORM,
        MODEL_TENSOR.CLS,
        MODEL_TENSOR.CLS_OUT,
    ],
    MODEL_ARCH.NOMIC_BERT: [
        MODEL_TENSOR.TOKEN_EMBD,
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@ -679,6 +679,14 @@ class TensorNameMap:
        MODEL_TENSOR.ENC_OUTPUT_NORM: (
            "encoder.final_layer_norm", # t5
        ),
        MODEL_TENSOR.CLS: (
            "classifier.dense", # roberta
        ),
        MODEL_TENSOR.CLS_OUT: (
            "classifier.out_proj", # roberta
        ),
    }
    # architecture-specific block mappings