mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-28 12:24:35 +00:00
cmpnct_gpt2bpe.hpp : fixes
This commit is contained in:
parent
b3a7a2b486
commit
dadf098b5a
@ -445,17 +445,13 @@ struct gpt2bpe_vocab {
|
|||||||
std::vector<std::pair<std::string, std::string>> bpe_merges;
|
std::vector<std::pair<std::string, std::string>> bpe_merges;
|
||||||
std::map<std::string, int> special_tokens;
|
std::map<std::string, int> special_tokens;
|
||||||
|
|
||||||
id special_bos_id = 0;
|
id special_bos_id = -1;
|
||||||
id special_eos_id = 0;
|
id special_eos_id = -1;
|
||||||
id special_unk_id = 0;
|
id special_unk_id = -1;
|
||||||
id special_sep_id = 0;
|
id special_sep_id = -1;
|
||||||
id special_pad_id = 0;
|
id special_pad_id = -1;
|
||||||
|
|
||||||
bool special_have_bos = false;
|
id linefeed_id = -1;
|
||||||
bool special_have_eos = false;
|
|
||||||
bool special_have_unk = false;
|
|
||||||
bool special_have_sep = false;
|
|
||||||
bool special_have_pad = false;
|
|
||||||
|
|
||||||
std::unordered_map<token, id> token_to_id;
|
std::unordered_map<token, id> token_to_id;
|
||||||
std::unordered_map<id, token> id_to_token;
|
std::unordered_map<id, token> id_to_token;
|
||||||
@ -1002,7 +998,7 @@ static std::vector<gpt2bpe_vocab::id> gpt2bpe_tokenize(const gpt2bpe_vocab & voc
|
|||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bos && vocab.special_have_bos) {
|
if (bos && vocab.special_bos_id != -1) {
|
||||||
output.push_back(vocab.special_bos_id);
|
output.push_back(vocab.special_bos_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user