mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 19:34:35 +00:00
tts : outetts-voc -> wavtokenizer-dec
This commit is contained in:
parent
f1b5b6b5a1
commit
985d59f5e5
@ -2032,9 +2032,9 @@ class Qwen2VLModel(Model):
|
|||||||
yield name, data
|
yield name, data
|
||||||
|
|
||||||
|
|
||||||
@Model.register("OuteTTSVocoder")
|
@Model.register("WavTokenizerDec")
|
||||||
class OuteTTSVocoderModel(Model):
|
class WavTokenizerDecModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.OUTETTS_VOC
|
model_arch = gguf.MODEL_ARCH.WAVTOKENIZER_DEC
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
del bid # unused
|
del bid # unused
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# convert the https://huggingface.co/novateur/WavTokenizer-large-speech-75token to HF format
|
# convert the https://huggingface.co/novateur/WavTokenizer-large-speech-75token to HF format
|
||||||
# the goal is to be able to reuse the convert_hf_to_gguf.py after that to create a GGUF file with the OuteTTSS vocoder
|
# the goal is to be able to reuse the convert_hf_to_gguf.py after that to create a GGUF file with the WavTokenizer decoder
|
||||||
#
|
#
|
||||||
# TODO: this script is LLM-generated and probably very inefficient and should be rewritten
|
# TODO: this script is LLM-generated and probably very inefficient and should be rewritten
|
||||||
|
|
||||||
@ -144,7 +144,7 @@ print(f"Metadata has been saved to {index_path}")
|
|||||||
|
|
||||||
config = {
|
config = {
|
||||||
"architectures": [
|
"architectures": [
|
||||||
"OuteTTSVocoder"
|
"WavTokenizerDec"
|
||||||
],
|
],
|
||||||
"hidden_size": 1282,
|
"hidden_size": 1282,
|
||||||
"vocab_size": 4096,
|
"vocab_size": 4096,
|
||||||
|
@ -209,59 +209,59 @@ class GGUFType:
|
|||||||
|
|
||||||
|
|
||||||
class MODEL_ARCH(IntEnum):
|
class MODEL_ARCH(IntEnum):
|
||||||
LLAMA = auto()
|
LLAMA = auto()
|
||||||
FALCON = auto()
|
FALCON = auto()
|
||||||
BAICHUAN = auto()
|
BAICHUAN = auto()
|
||||||
GROK = auto()
|
GROK = auto()
|
||||||
GPT2 = auto()
|
GPT2 = auto()
|
||||||
GPTJ = auto()
|
GPTJ = auto()
|
||||||
GPTNEOX = auto()
|
GPTNEOX = auto()
|
||||||
MPT = auto()
|
MPT = auto()
|
||||||
STARCODER = auto()
|
STARCODER = auto()
|
||||||
REFACT = auto()
|
REFACT = auto()
|
||||||
BERT = auto()
|
BERT = auto()
|
||||||
NOMIC_BERT = auto()
|
NOMIC_BERT = auto()
|
||||||
JINA_BERT_V2 = auto()
|
JINA_BERT_V2 = auto()
|
||||||
BLOOM = auto()
|
BLOOM = auto()
|
||||||
STABLELM = auto()
|
STABLELM = auto()
|
||||||
QWEN = auto()
|
QWEN = auto()
|
||||||
QWEN2 = auto()
|
QWEN2 = auto()
|
||||||
QWEN2MOE = auto()
|
QWEN2MOE = auto()
|
||||||
QWEN2VL = auto()
|
QWEN2VL = auto()
|
||||||
PHI2 = auto()
|
PHI2 = auto()
|
||||||
PHI3 = auto()
|
PHI3 = auto()
|
||||||
PLAMO = auto()
|
PLAMO = auto()
|
||||||
CODESHELL = auto()
|
CODESHELL = auto()
|
||||||
ORION = auto()
|
ORION = auto()
|
||||||
INTERNLM2 = auto()
|
INTERNLM2 = auto()
|
||||||
MINICPM = auto()
|
MINICPM = auto()
|
||||||
MINICPM3 = auto()
|
MINICPM3 = auto()
|
||||||
GEMMA = auto()
|
GEMMA = auto()
|
||||||
GEMMA2 = auto()
|
GEMMA2 = auto()
|
||||||
STARCODER2 = auto()
|
STARCODER2 = auto()
|
||||||
RWKV6 = auto()
|
RWKV6 = auto()
|
||||||
MAMBA = auto()
|
MAMBA = auto()
|
||||||
XVERSE = auto()
|
XVERSE = auto()
|
||||||
COMMAND_R = auto()
|
COMMAND_R = auto()
|
||||||
DBRX = auto()
|
DBRX = auto()
|
||||||
OLMO = auto()
|
OLMO = auto()
|
||||||
OLMO2 = auto()
|
OLMO2 = auto()
|
||||||
OLMOE = auto()
|
OLMOE = auto()
|
||||||
OPENELM = auto()
|
OPENELM = auto()
|
||||||
ARCTIC = auto()
|
ARCTIC = auto()
|
||||||
DEEPSEEK = auto()
|
DEEPSEEK = auto()
|
||||||
DEEPSEEK2 = auto()
|
DEEPSEEK2 = auto()
|
||||||
CHATGLM = auto()
|
CHATGLM = auto()
|
||||||
BITNET = auto()
|
BITNET = auto()
|
||||||
T5 = auto()
|
T5 = auto()
|
||||||
T5ENCODER = auto()
|
T5ENCODER = auto()
|
||||||
JAIS = auto()
|
JAIS = auto()
|
||||||
NEMOTRON = auto()
|
NEMOTRON = auto()
|
||||||
EXAONE = auto()
|
EXAONE = auto()
|
||||||
GRANITE = auto()
|
GRANITE = auto()
|
||||||
GRANITE_MOE = auto()
|
GRANITE_MOE = auto()
|
||||||
CHAMELEON = auto()
|
CHAMELEON = auto()
|
||||||
OUTETTS_VOC = auto()
|
WAVTOKENIZER_DEC = auto()
|
||||||
|
|
||||||
|
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
@ -390,59 +390,59 @@ class MODEL_TENSOR(IntEnum):
|
|||||||
|
|
||||||
|
|
||||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.LLAMA: "llama",
|
MODEL_ARCH.LLAMA: "llama",
|
||||||
MODEL_ARCH.FALCON: "falcon",
|
MODEL_ARCH.FALCON: "falcon",
|
||||||
MODEL_ARCH.BAICHUAN: "baichuan",
|
MODEL_ARCH.BAICHUAN: "baichuan",
|
||||||
MODEL_ARCH.GROK: "grok",
|
MODEL_ARCH.GROK: "grok",
|
||||||
MODEL_ARCH.GPT2: "gpt2",
|
MODEL_ARCH.GPT2: "gpt2",
|
||||||
MODEL_ARCH.GPTJ: "gptj",
|
MODEL_ARCH.GPTJ: "gptj",
|
||||||
MODEL_ARCH.GPTNEOX: "gptneox",
|
MODEL_ARCH.GPTNEOX: "gptneox",
|
||||||
MODEL_ARCH.MPT: "mpt",
|
MODEL_ARCH.MPT: "mpt",
|
||||||
MODEL_ARCH.STARCODER: "starcoder",
|
MODEL_ARCH.STARCODER: "starcoder",
|
||||||
MODEL_ARCH.REFACT: "refact",
|
MODEL_ARCH.REFACT: "refact",
|
||||||
MODEL_ARCH.BERT: "bert",
|
MODEL_ARCH.BERT: "bert",
|
||||||
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
||||||
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
||||||
MODEL_ARCH.BLOOM: "bloom",
|
MODEL_ARCH.BLOOM: "bloom",
|
||||||
MODEL_ARCH.STABLELM: "stablelm",
|
MODEL_ARCH.STABLELM: "stablelm",
|
||||||
MODEL_ARCH.QWEN: "qwen",
|
MODEL_ARCH.QWEN: "qwen",
|
||||||
MODEL_ARCH.QWEN2: "qwen2",
|
MODEL_ARCH.QWEN2: "qwen2",
|
||||||
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
||||||
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
||||||
MODEL_ARCH.PHI2: "phi2",
|
MODEL_ARCH.PHI2: "phi2",
|
||||||
MODEL_ARCH.PHI3: "phi3",
|
MODEL_ARCH.PHI3: "phi3",
|
||||||
MODEL_ARCH.PLAMO: "plamo",
|
MODEL_ARCH.PLAMO: "plamo",
|
||||||
MODEL_ARCH.CODESHELL: "codeshell",
|
MODEL_ARCH.CODESHELL: "codeshell",
|
||||||
MODEL_ARCH.ORION: "orion",
|
MODEL_ARCH.ORION: "orion",
|
||||||
MODEL_ARCH.INTERNLM2: "internlm2",
|
MODEL_ARCH.INTERNLM2: "internlm2",
|
||||||
MODEL_ARCH.MINICPM: "minicpm",
|
MODEL_ARCH.MINICPM: "minicpm",
|
||||||
MODEL_ARCH.MINICPM3: "minicpm3",
|
MODEL_ARCH.MINICPM3: "minicpm3",
|
||||||
MODEL_ARCH.GEMMA: "gemma",
|
MODEL_ARCH.GEMMA: "gemma",
|
||||||
MODEL_ARCH.GEMMA2: "gemma2",
|
MODEL_ARCH.GEMMA2: "gemma2",
|
||||||
MODEL_ARCH.STARCODER2: "starcoder2",
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
||||||
MODEL_ARCH.RWKV6: "rwkv6",
|
MODEL_ARCH.RWKV6: "rwkv6",
|
||||||
MODEL_ARCH.MAMBA: "mamba",
|
MODEL_ARCH.MAMBA: "mamba",
|
||||||
MODEL_ARCH.XVERSE: "xverse",
|
MODEL_ARCH.XVERSE: "xverse",
|
||||||
MODEL_ARCH.COMMAND_R: "command-r",
|
MODEL_ARCH.COMMAND_R: "command-r",
|
||||||
MODEL_ARCH.DBRX: "dbrx",
|
MODEL_ARCH.DBRX: "dbrx",
|
||||||
MODEL_ARCH.OLMO: "olmo",
|
MODEL_ARCH.OLMO: "olmo",
|
||||||
MODEL_ARCH.OLMO2: "olmo2",
|
MODEL_ARCH.OLMO2: "olmo2",
|
||||||
MODEL_ARCH.OLMOE: "olmoe",
|
MODEL_ARCH.OLMOE: "olmoe",
|
||||||
MODEL_ARCH.OPENELM: "openelm",
|
MODEL_ARCH.OPENELM: "openelm",
|
||||||
MODEL_ARCH.ARCTIC: "arctic",
|
MODEL_ARCH.ARCTIC: "arctic",
|
||||||
MODEL_ARCH.DEEPSEEK: "deepseek",
|
MODEL_ARCH.DEEPSEEK: "deepseek",
|
||||||
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
||||||
MODEL_ARCH.CHATGLM: "chatglm",
|
MODEL_ARCH.CHATGLM: "chatglm",
|
||||||
MODEL_ARCH.BITNET: "bitnet",
|
MODEL_ARCH.BITNET: "bitnet",
|
||||||
MODEL_ARCH.T5: "t5",
|
MODEL_ARCH.T5: "t5",
|
||||||
MODEL_ARCH.T5ENCODER: "t5encoder",
|
MODEL_ARCH.T5ENCODER: "t5encoder",
|
||||||
MODEL_ARCH.JAIS: "jais",
|
MODEL_ARCH.JAIS: "jais",
|
||||||
MODEL_ARCH.NEMOTRON: "nemotron",
|
MODEL_ARCH.NEMOTRON: "nemotron",
|
||||||
MODEL_ARCH.EXAONE: "exaone",
|
MODEL_ARCH.EXAONE: "exaone",
|
||||||
MODEL_ARCH.GRANITE: "granite",
|
MODEL_ARCH.GRANITE: "granite",
|
||||||
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
||||||
MODEL_ARCH.CHAMELEON: "chameleon",
|
MODEL_ARCH.CHAMELEON: "chameleon",
|
||||||
MODEL_ARCH.OUTETTS_VOC: "outetts-voc",
|
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
||||||
}
|
}
|
||||||
|
|
||||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
@ -1406,7 +1406,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.OUTETTS_VOC: [
|
MODEL_ARCH.WAVTOKENIZER_DEC: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
||||||
MODEL_TENSOR.CONV1D,
|
MODEL_TENSOR.CONV1D,
|
||||||
|
@ -42,7 +42,7 @@ class TensorNameMap:
|
|||||||
"emb_ln", # nomic-bert
|
"emb_ln", # nomic-bert
|
||||||
"transformer.norm", # openelm
|
"transformer.norm", # openelm
|
||||||
"rwkv.blocks.0.pre_ln", # rwkv
|
"rwkv.blocks.0.pre_ln", # rwkv
|
||||||
"backbone.norm", # outetts
|
"backbone.norm", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
# Position embeddings
|
# Position embeddings
|
||||||
@ -61,7 +61,7 @@ class TensorNameMap:
|
|||||||
"lm_head.linear", # phi2
|
"lm_head.linear", # phi2
|
||||||
"output_layer", # chatglm
|
"output_layer", # chatglm
|
||||||
"head", # rwkv
|
"head", # rwkv
|
||||||
"head.out", # outetts
|
"head.out", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
# Output norm
|
# Output norm
|
||||||
@ -82,7 +82,7 @@ class TensorNameMap:
|
|||||||
"transformer.norm", # openelm
|
"transformer.norm", # openelm
|
||||||
"model.norm", # nemotron
|
"model.norm", # nemotron
|
||||||
"rwkv.ln_out", # rwkv
|
"rwkv.ln_out", # rwkv
|
||||||
"backbone.final_layer_norm", # outetts
|
"backbone.final_layer_norm", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rope frequencies
|
# Rope frequencies
|
||||||
@ -705,63 +705,63 @@ class TensorNameMap:
|
|||||||
#############################################################################
|
#############################################################################
|
||||||
|
|
||||||
MODEL_TENSOR.CONV_NEXT_DW: (
|
MODEL_TENSOR.CONV_NEXT_DW: (
|
||||||
"backbone.convnext.{bid}.dwconv", # outetts
|
"backbone.convnext.{bid}.dwconv", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.CONV_NEXT_NORM: (
|
MODEL_TENSOR.CONV_NEXT_NORM: (
|
||||||
"backbone.convnext.{bid}.norm", # outetts
|
"backbone.convnext.{bid}.norm", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.CONV_NEXT_PW1: (
|
MODEL_TENSOR.CONV_NEXT_PW1: (
|
||||||
"backbone.convnext.{bid}.pwconv1", # outetts
|
"backbone.convnext.{bid}.pwconv1", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.CONV_NEXT_PW2: (
|
MODEL_TENSOR.CONV_NEXT_PW2: (
|
||||||
"backbone.convnext.{bid}.pwconv2", # outetts
|
"backbone.convnext.{bid}.pwconv2", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.CONV_NEXT_GAMMA: (
|
MODEL_TENSOR.CONV_NEXT_GAMMA: (
|
||||||
"backbone.convnext.{bid}.gamma", # outetts
|
"backbone.convnext.{bid}.gamma", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_CONV1: (
|
MODEL_TENSOR.POS_NET_CONV1: (
|
||||||
"backbone.pos_net.{bid}.conv1", # outetts
|
"backbone.pos_net.{bid}.conv1", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_CONV2: (
|
MODEL_TENSOR.POS_NET_CONV2: (
|
||||||
"backbone.pos_net.{bid}.conv2", # outetts
|
"backbone.pos_net.{bid}.conv2", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_NORM: (
|
MODEL_TENSOR.POS_NET_NORM: (
|
||||||
"backbone.pos_net.{bid}.norm", # outetts
|
"backbone.pos_net.{bid}.norm", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_NORM1: (
|
MODEL_TENSOR.POS_NET_NORM1: (
|
||||||
"backbone.pos_net.{bid}.norm1", # outetts
|
"backbone.pos_net.{bid}.norm1", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_NORM2: (
|
MODEL_TENSOR.POS_NET_NORM2: (
|
||||||
"backbone.pos_net.{bid}.norm2", # outetts
|
"backbone.pos_net.{bid}.norm2", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_ATTN_NORM: (
|
MODEL_TENSOR.POS_NET_ATTN_NORM: (
|
||||||
"backbone.pos_net.{bid}.norm", # outetts
|
"backbone.pos_net.{bid}.norm", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_ATTN_Q: (
|
MODEL_TENSOR.POS_NET_ATTN_Q: (
|
||||||
"backbone.pos_net.{bid}.q", # outetts
|
"backbone.pos_net.{bid}.q", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_ATTN_K: (
|
MODEL_TENSOR.POS_NET_ATTN_K: (
|
||||||
"backbone.pos_net.{bid}.k", # outetts
|
"backbone.pos_net.{bid}.k", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_ATTN_V: (
|
MODEL_TENSOR.POS_NET_ATTN_V: (
|
||||||
"backbone.pos_net.{bid}.v", # outetts
|
"backbone.pos_net.{bid}.v", # wavtokenizer
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.POS_NET_ATTN_OUT: (
|
MODEL_TENSOR.POS_NET_ATTN_OUT: (
|
||||||
"backbone.pos_net.{bid}.proj_out", # outetts
|
"backbone.pos_net.{bid}.proj_out", # wavtokenizer
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
136
src/llama.cpp
136
src/llama.cpp
@ -197,65 +197,65 @@ enum llm_arch {
|
|||||||
LLM_ARCH_GRANITE,
|
LLM_ARCH_GRANITE,
|
||||||
LLM_ARCH_GRANITE_MOE,
|
LLM_ARCH_GRANITE_MOE,
|
||||||
LLM_ARCH_CHAMELEON,
|
LLM_ARCH_CHAMELEON,
|
||||||
LLM_ARCH_OUTETTS_VOC,
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
||||||
LLM_ARCH_UNKNOWN,
|
LLM_ARCH_UNKNOWN,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||||
{ LLM_ARCH_LLAMA, "llama" },
|
{ LLM_ARCH_LLAMA, "llama" },
|
||||||
{ LLM_ARCH_FALCON, "falcon" },
|
{ LLM_ARCH_FALCON, "falcon" },
|
||||||
{ LLM_ARCH_GROK, "grok" },
|
{ LLM_ARCH_GROK, "grok" },
|
||||||
{ LLM_ARCH_GPT2, "gpt2" },
|
{ LLM_ARCH_GPT2, "gpt2" },
|
||||||
{ LLM_ARCH_GPTJ, "gptj" },
|
{ LLM_ARCH_GPTJ, "gptj" },
|
||||||
{ LLM_ARCH_GPTNEOX, "gptneox" },
|
{ LLM_ARCH_GPTNEOX, "gptneox" },
|
||||||
{ LLM_ARCH_MPT, "mpt" },
|
{ LLM_ARCH_MPT, "mpt" },
|
||||||
{ LLM_ARCH_BAICHUAN, "baichuan" },
|
{ LLM_ARCH_BAICHUAN, "baichuan" },
|
||||||
{ LLM_ARCH_STARCODER, "starcoder" },
|
{ LLM_ARCH_STARCODER, "starcoder" },
|
||||||
{ LLM_ARCH_REFACT, "refact" },
|
{ LLM_ARCH_REFACT, "refact" },
|
||||||
{ LLM_ARCH_BERT, "bert" },
|
{ LLM_ARCH_BERT, "bert" },
|
||||||
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
|
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
|
||||||
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
|
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
|
||||||
{ LLM_ARCH_BLOOM, "bloom" },
|
{ LLM_ARCH_BLOOM, "bloom" },
|
||||||
{ LLM_ARCH_STABLELM, "stablelm" },
|
{ LLM_ARCH_STABLELM, "stablelm" },
|
||||||
{ LLM_ARCH_QWEN, "qwen" },
|
{ LLM_ARCH_QWEN, "qwen" },
|
||||||
{ LLM_ARCH_QWEN2, "qwen2" },
|
{ LLM_ARCH_QWEN2, "qwen2" },
|
||||||
{ LLM_ARCH_QWEN2MOE, "qwen2moe" },
|
{ LLM_ARCH_QWEN2MOE, "qwen2moe" },
|
||||||
{ LLM_ARCH_QWEN2VL, "qwen2vl" },
|
{ LLM_ARCH_QWEN2VL, "qwen2vl" },
|
||||||
{ LLM_ARCH_PHI2, "phi2" },
|
{ LLM_ARCH_PHI2, "phi2" },
|
||||||
{ LLM_ARCH_PHI3, "phi3" },
|
{ LLM_ARCH_PHI3, "phi3" },
|
||||||
{ LLM_ARCH_PLAMO, "plamo" },
|
{ LLM_ARCH_PLAMO, "plamo" },
|
||||||
{ LLM_ARCH_CODESHELL, "codeshell" },
|
{ LLM_ARCH_CODESHELL, "codeshell" },
|
||||||
{ LLM_ARCH_ORION, "orion" },
|
{ LLM_ARCH_ORION, "orion" },
|
||||||
{ LLM_ARCH_INTERNLM2, "internlm2" },
|
{ LLM_ARCH_INTERNLM2, "internlm2" },
|
||||||
{ LLM_ARCH_MINICPM, "minicpm" },
|
{ LLM_ARCH_MINICPM, "minicpm" },
|
||||||
{ LLM_ARCH_MINICPM3, "minicpm3" },
|
{ LLM_ARCH_MINICPM3, "minicpm3" },
|
||||||
{ LLM_ARCH_GEMMA, "gemma" },
|
{ LLM_ARCH_GEMMA, "gemma" },
|
||||||
{ LLM_ARCH_GEMMA2, "gemma2" },
|
{ LLM_ARCH_GEMMA2, "gemma2" },
|
||||||
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
||||||
{ LLM_ARCH_MAMBA, "mamba" },
|
{ LLM_ARCH_MAMBA, "mamba" },
|
||||||
{ LLM_ARCH_XVERSE, "xverse" },
|
{ LLM_ARCH_XVERSE, "xverse" },
|
||||||
{ LLM_ARCH_COMMAND_R, "command-r" },
|
{ LLM_ARCH_COMMAND_R, "command-r" },
|
||||||
{ LLM_ARCH_DBRX, "dbrx" },
|
{ LLM_ARCH_DBRX, "dbrx" },
|
||||||
{ LLM_ARCH_OLMO, "olmo" },
|
{ LLM_ARCH_OLMO, "olmo" },
|
||||||
{ LLM_ARCH_OLMO2, "olmo2" },
|
{ LLM_ARCH_OLMO2, "olmo2" },
|
||||||
{ LLM_ARCH_OLMOE, "olmoe" },
|
{ LLM_ARCH_OLMOE, "olmoe" },
|
||||||
{ LLM_ARCH_OPENELM, "openelm" },
|
{ LLM_ARCH_OPENELM, "openelm" },
|
||||||
{ LLM_ARCH_ARCTIC, "arctic" },
|
{ LLM_ARCH_ARCTIC, "arctic" },
|
||||||
{ LLM_ARCH_DEEPSEEK, "deepseek" },
|
{ LLM_ARCH_DEEPSEEK, "deepseek" },
|
||||||
{ LLM_ARCH_DEEPSEEK2, "deepseek2" },
|
{ LLM_ARCH_DEEPSEEK2, "deepseek2" },
|
||||||
{ LLM_ARCH_CHATGLM, "chatglm" },
|
{ LLM_ARCH_CHATGLM, "chatglm" },
|
||||||
{ LLM_ARCH_BITNET, "bitnet" },
|
{ LLM_ARCH_BITNET, "bitnet" },
|
||||||
{ LLM_ARCH_T5, "t5" },
|
{ LLM_ARCH_T5, "t5" },
|
||||||
{ LLM_ARCH_T5ENCODER, "t5encoder" },
|
{ LLM_ARCH_T5ENCODER, "t5encoder" },
|
||||||
{ LLM_ARCH_JAIS, "jais" },
|
{ LLM_ARCH_JAIS, "jais" },
|
||||||
{ LLM_ARCH_NEMOTRON, "nemotron" },
|
{ LLM_ARCH_NEMOTRON, "nemotron" },
|
||||||
{ LLM_ARCH_EXAONE, "exaone" },
|
{ LLM_ARCH_EXAONE, "exaone" },
|
||||||
{ LLM_ARCH_RWKV6, "rwkv6" },
|
{ LLM_ARCH_RWKV6, "rwkv6" },
|
||||||
{ LLM_ARCH_GRANITE, "granite" },
|
{ LLM_ARCH_GRANITE, "granite" },
|
||||||
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
||||||
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
||||||
{ LLM_ARCH_OUTETTS_VOC, "outetts-voc" },
|
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
||||||
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
||||||
};
|
};
|
||||||
|
|
||||||
enum llm_kv {
|
enum llm_kv {
|
||||||
@ -1612,7 +1612,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
LLM_ARCH_OUTETTS_VOC,
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
||||||
{
|
{
|
||||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||||
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
||||||
@ -3063,7 +3063,7 @@ struct llama_model {
|
|||||||
struct ggml_tensor * cls_out = nullptr;
|
struct ggml_tensor * cls_out = nullptr;
|
||||||
struct ggml_tensor * cls_out_b = nullptr;
|
struct ggml_tensor * cls_out_b = nullptr;
|
||||||
|
|
||||||
// outetts vocoder
|
// wavtokenizer decoder
|
||||||
// TODO: dedup
|
// TODO: dedup
|
||||||
struct ggml_tensor * conv_1d = nullptr;
|
struct ggml_tensor * conv_1d = nullptr;
|
||||||
struct ggml_tensor * conv_1d_b = nullptr;
|
struct ggml_tensor * conv_1d_b = nullptr;
|
||||||
@ -6443,7 +6443,7 @@ static void llm_load_hparams(
|
|||||||
default: model.type = e_model::MODEL_UNKNOWN;
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_OUTETTS_VOC:
|
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||||
} break;
|
} break;
|
||||||
@ -9545,7 +9545,7 @@ static bool llm_load_tensors(
|
|||||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
|
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_OUTETTS_VOC:
|
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||||
{
|
{
|
||||||
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {512, n_vocab}, 0);
|
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {512, n_vocab}, 0);
|
||||||
|
|
||||||
@ -16142,7 +16142,7 @@ struct llm_build_context {
|
|||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph * build_t5_encoder() {
|
struct ggml_cgraph * build_t5_enc() {
|
||||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
||||||
|
|
||||||
// mutable variable, needed during the last layer of the computation to skip unused tokens
|
// mutable variable, needed during the last layer of the computation to skip unused tokens
|
||||||
@ -16274,7 +16274,7 @@ struct llm_build_context {
|
|||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph * build_t5_decoder() {
|
struct ggml_cgraph * build_t5_dec() {
|
||||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
||||||
|
|
||||||
// mutable variable, needed during the last layer of the computation to skip unused tokens
|
// mutable variable, needed during the last layer of the computation to skip unused tokens
|
||||||
@ -17224,7 +17224,7 @@ struct llm_build_context {
|
|||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph * build_outetts_voc() {
|
struct ggml_cgraph * build_wavtokenizer_dec() {
|
||||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
@ -17692,14 +17692,14 @@ static struct ggml_cgraph * llama_build_graph(
|
|||||||
case LLM_ARCH_T5:
|
case LLM_ARCH_T5:
|
||||||
{
|
{
|
||||||
if (lctx.is_encoding) {
|
if (lctx.is_encoding) {
|
||||||
result = llm.build_t5_encoder();
|
result = llm.build_t5_enc();
|
||||||
} else {
|
} else {
|
||||||
result = llm.build_t5_decoder();
|
result = llm.build_t5_dec();
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_T5ENCODER:
|
case LLM_ARCH_T5ENCODER:
|
||||||
{
|
{
|
||||||
result = llm.build_t5_encoder();
|
result = llm.build_t5_enc();
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_JAIS:
|
case LLM_ARCH_JAIS:
|
||||||
{
|
{
|
||||||
@ -17721,9 +17721,9 @@ static struct ggml_cgraph * llama_build_graph(
|
|||||||
{
|
{
|
||||||
result = llm.build_chameleon();
|
result = llm.build_chameleon();
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_OUTETTS_VOC:
|
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||||
{
|
{
|
||||||
result = llm.build_outetts_voc();
|
result = llm.build_wavtokenizer_dec();
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
GGML_ABORT("fatal error");
|
GGML_ABORT("fatal error");
|
||||||
@ -20904,7 +20904,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
|||||||
case LLM_ARCH_T5ENCODER:
|
case LLM_ARCH_T5ENCODER:
|
||||||
case LLM_ARCH_JAIS:
|
case LLM_ARCH_JAIS:
|
||||||
case LLM_ARCH_RWKV6:
|
case LLM_ARCH_RWKV6:
|
||||||
case LLM_ARCH_OUTETTS_VOC:
|
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||||
return LLAMA_ROPE_TYPE_NONE;
|
return LLAMA_ROPE_TYPE_NONE;
|
||||||
|
|
||||||
// use what we call a normal RoPE, operating on pairs of consecutive head values
|
// use what we call a normal RoPE, operating on pairs of consecutive head values
|
||||||
|
Loading…
Reference in New Issue
Block a user