mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-09-23 05:26:19 +00:00
Fix trailing ws
This commit is contained in:
parent
0c5baa1cd1
commit
0d198bbf98
@ -533,7 +533,7 @@ With input 'á' (utf8 hex: C3 A1) on tinyllama/stories260k
|
||||
```json
|
||||
{
|
||||
"tokens": [
|
||||
{"id": 198, "piece": [195]}, // hex C3
|
||||
{"id": 198, "piece": [195]}, // hex C3
|
||||
{"id": 164, "piece": [161]} // hex A1
|
||||
]
|
||||
}
|
||||
|
@ -104,15 +104,15 @@ Feature: llama.cpp server
|
||||
Then tokens begin with BOS
|
||||
Given first token is removed
|
||||
Then tokens can be detokenized
|
||||
|
||||
|
||||
Scenario: Tokenize with pieces
|
||||
When tokenizing with pieces:
|
||||
"""
|
||||
What is the capital of Germany?
|
||||
What is the capital of Germany?
|
||||
媽
|
||||
"""
|
||||
Then tokens are given with pieces
|
||||
|
||||
|
||||
Scenario: Models available
|
||||
Given available models
|
||||
Then 1 models are supported
|
||||
|
@ -603,7 +603,7 @@ static bool is_valid_utf8(const std::string & str) {
|
||||
bytes += 3;
|
||||
} else if ((*bytes & 0xF8) == 0xF0) {
|
||||
// 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
|
||||
if (end - bytes < 4 || (bytes[1] & 0xC0) != 0x80 ||
|
||||
if (end - bytes < 4 || (bytes[1] & 0xC0) != 0x80 ||
|
||||
(bytes[2] & 0xC0) != 0x80 || (bytes[3] & 0xC0) != 0x80)
|
||||
return false;
|
||||
bytes += 4;
|
||||
|
Loading…
Reference in New Issue
Block a user