mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-03 15:24:35 +00:00
py : add XLMRobertaForSequenceClassification [no ci]
This commit is contained in:
parent
1e43630218
commit
3453e62bb9
@ -2598,7 +2598,7 @@ class NomicBertModel(BertModel):
|
|||||||
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
|
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
|
||||||
|
|
||||||
|
|
||||||
@Model.register("XLMRobertaModel")
|
@Model.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
|
||||||
class XLMRobertaModel(BertModel):
|
class XLMRobertaModel(BertModel):
|
||||||
model_arch = gguf.MODEL_ARCH.BERT
|
model_arch = gguf.MODEL_ARCH.BERT
|
||||||
|
|
||||||
@ -2701,6 +2701,11 @@ class XLMRobertaModel(BertModel):
|
|||||||
if self._position_offset is not None:
|
if self._position_offset is not None:
|
||||||
data_torch = data_torch[self._position_offset:,:]
|
data_torch = data_torch[self._position_offset:,:]
|
||||||
|
|
||||||
|
# if name starts with "roberta.", remove the prefix
|
||||||
|
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
|
||||||
|
if name.startswith("roberta."):
|
||||||
|
name = name[8:]
|
||||||
|
|
||||||
return super().modify_tensors(data_torch, name, bid)
|
return super().modify_tensors(data_torch, name, bid)
|
||||||
|
|
||||||
|
|
||||||
|
@ -343,6 +343,8 @@ class MODEL_TENSOR(IntEnum):
|
|||||||
ENC_FFN_DOWN = auto()
|
ENC_FFN_DOWN = auto()
|
||||||
ENC_FFN_UP = auto()
|
ENC_FFN_UP = auto()
|
||||||
ENC_OUTPUT_NORM = auto()
|
ENC_OUTPUT_NORM = auto()
|
||||||
|
CLS = auto() # classifier
|
||||||
|
CLS_OUT = auto() # classifier output projection
|
||||||
|
|
||||||
|
|
||||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
@ -501,6 +503,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|||||||
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
|
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
|
||||||
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
|
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
|
||||||
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
||||||
|
MODEL_TENSOR.CLS: "cls",
|
||||||
|
MODEL_TENSOR.CLS_OUT: "cls.output",
|
||||||
}
|
}
|
||||||
|
|
||||||
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
@ -610,6 +614,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM,
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
||||||
|
MODEL_TENSOR.CLS,
|
||||||
|
MODEL_TENSOR.CLS_OUT,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.NOMIC_BERT: [
|
MODEL_ARCH.NOMIC_BERT: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
@ -679,6 +679,14 @@ class TensorNameMap:
|
|||||||
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
||||||
"encoder.final_layer_norm", # t5
|
"encoder.final_layer_norm", # t5
|
||||||
),
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.CLS: (
|
||||||
|
"classifier.dense", # roberta
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.CLS_OUT: (
|
||||||
|
"classifier.out_proj", # roberta
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
# architecture-specific block mappings
|
# architecture-specific block mappings
|
||||||
|
Loading…
Reference in New Issue
Block a user