llama : use Unicode Escape Sequence to replace encoded characters (#2814)

The use of special characters within source files can break compiling on some computers with different region and language settings. Using Unicode escape sequences should allow for the code to be compiled on all setups without needing to change your computers settings or switch regions.
This commit is contained in:
Tim Miller 2023-08-27 03:27:07 +09:00 committed by GitHub
parent 61d1a2895e
commit c7d92e6dfe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -955,10 +955,10 @@ struct llama_vocab {
id linefeed_id = 13; id linefeed_id = 13;
int find_bpe_rank(std::string token_left, std::string token_right) const { int find_bpe_rank(std::string token_left, std::string token_right) const {
replace_all(token_left, " ", "Ġ"); replace_all(token_left, " ", "\u0120");
replace_all(token_left, "\n", "Ċ"); replace_all(token_left, "\n", "\u010A");
replace_all(token_right, " ", "Ġ"); replace_all(token_right, " ", "\u0120");
replace_all(token_right, "\n", "Ċ"); replace_all(token_right, "\n", "\u010A");
auto it = bpe_ranks.find(std::make_pair(token_left, token_right)); auto it = bpe_ranks.find(std::make_pair(token_left, token_right));
if (it == bpe_ranks.end()) { if (it == bpe_ranks.end()) {