constants.py : add token types

This commit is contained in:
klosax 2023-08-14 22:07:53 +02:00 committed by GitHub
parent 7ec125b1dc
commit 5d518d421f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -36,19 +36,15 @@ KEY_ROPE_DIMENSION_COUNT = "{llm}.rope.dimension_count"
KEY_ROPE_SCALE = "{llm}.rope.scale"
# tokenization
KEY_TOKENIZER_MODEL = "tokenizer.ggml.model"
KEY_TOKENIZER_LIST = "tokenizer.ggml.tokens"
KEY_TOKENIZER_SCORES = "tokenizer.ggml.scores"
KEY_TOKENIZER_MERGES = "tokenizer.ggml.merges"
KEY_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id"
KEY_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id"
KEY_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id"
KEY_TOKENIZER_SEP_ID = "tokenizer.ggml.seperator_token_id"
KEY_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"
KEY_TOKENIZER_HF_JSON = "tokenizer.huggingface.json"
KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world"
KEY_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id"
KEY_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id"
KEY_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id"
KEY_TOKENIZER_SEP_ID = "tokenizer.ggml.separator_token_id"
KEY_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"
KEY_TOKENIZER_MODEL = "tokenizer.ggml.model"
KEY_TOKENIZER_LIST = "tokenizer.ggml.tokens"
KEY_TOKENIZER_TOKEN_TYPE = "tokenizer.ggml.token_type"
KEY_TOKENIZER_SCORES = "tokenizer.ggml.scores"
KEY_TOKENIZER_MERGES = "tokenizer.ggml.merges"
KEY_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id"
KEY_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id"
KEY_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id"
KEY_TOKENIZER_SEP_ID = "tokenizer.ggml.seperator_token_id"
KEY_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"
KEY_TOKENIZER_HF_JSON = "tokenizer.huggingface.json"
KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world"