py : add XLMRobertaForSequenceClassification [no ci]

This commit is contained in:
Georgi Gerganov 2024-09-16 16:59:17 +03:00
parent 1e43630218
commit 3453e62bb9
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
3 changed files with 20 additions and 1 deletions

View File

@ -2598,7 +2598,7 @@ class NomicBertModel(BertModel):
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"]) self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
@Model.register("XLMRobertaModel") @Model.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
class XLMRobertaModel(BertModel): class XLMRobertaModel(BertModel):
model_arch = gguf.MODEL_ARCH.BERT model_arch = gguf.MODEL_ARCH.BERT
@ -2701,6 +2701,11 @@ class XLMRobertaModel(BertModel):
if self._position_offset is not None: if self._position_offset is not None:
data_torch = data_torch[self._position_offset:,:] data_torch = data_torch[self._position_offset:,:]
# if name starts with "roberta.", remove the prefix
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
if name.startswith("roberta."):
name = name[8:]
return super().modify_tensors(data_torch, name, bid) return super().modify_tensors(data_torch, name, bid)

View File

@ -343,6 +343,8 @@ class MODEL_TENSOR(IntEnum):
ENC_FFN_DOWN = auto() ENC_FFN_DOWN = auto()
ENC_FFN_UP = auto() ENC_FFN_UP = auto()
ENC_OUTPUT_NORM = auto() ENC_OUTPUT_NORM = auto()
CLS = auto() # classifier
CLS_OUT = auto() # classifier output projection
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@ -501,6 +503,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down", MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up", MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm", MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
MODEL_TENSOR.CLS: "cls",
MODEL_TENSOR.CLS_OUT: "cls.output",
} }
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@ -610,6 +614,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP, MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM, MODEL_TENSOR.LAYER_OUT_NORM,
MODEL_TENSOR.CLS,
MODEL_TENSOR.CLS_OUT,
], ],
MODEL_ARCH.NOMIC_BERT: [ MODEL_ARCH.NOMIC_BERT: [
MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.TOKEN_EMBD,

View File

@ -679,6 +679,14 @@ class TensorNameMap:
MODEL_TENSOR.ENC_OUTPUT_NORM: ( MODEL_TENSOR.ENC_OUTPUT_NORM: (
"encoder.final_layer_norm", # t5 "encoder.final_layer_norm", # t5
), ),
MODEL_TENSOR.CLS: (
"classifier.dense", # roberta
),
MODEL_TENSOR.CLS_OUT: (
"classifier.out_proj", # roberta
),
} }
# architecture-specific block mappings # architecture-specific block mappings