mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 19:21:46 +00:00
server : add_special option for tokenize endpoint (#7059)
This commit is contained in:
parent
ad211edef5
commit
911b3900dd
@ -331,7 +331,7 @@ Notice that each `probs` is an array of length `n_probs`.
|
||||
|
||||
`content`: Set the text to tokenize.
|
||||
|
||||
Note that a special `BOS` token is never inserted.
|
||||
`add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false`
|
||||
|
||||
- **POST** `/detokenize`: Convert tokens to text.
|
||||
|
||||
|
@ -3647,7 +3647,8 @@ int main(int argc, char ** argv) {
|
||||
|
||||
std::vector<llama_token> tokens;
|
||||
if (body.count("content") != 0) {
|
||||
tokens = ctx_server.tokenize(body["content"], false);
|
||||
const bool add_special = json_value(body, "add_special", false);
|
||||
tokens = ctx_server.tokenize(body["content"], add_special);
|
||||
}
|
||||
const json data = format_tokenizer_response(tokens);
|
||||
return res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||
|
@ -7,6 +7,7 @@ Feature: llama.cpp server
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a model file test-model.gguf
|
||||
And a model alias tinyllama-2
|
||||
And BOS token is 1
|
||||
And 42 as server seed
|
||||
# KV Cache corresponds to the total amount of tokens
|
||||
# that can be stored across all independent sequences: #4130
|
||||
@ -91,7 +92,18 @@ Feature: llama.cpp server
|
||||
"""
|
||||
What is the capital of France ?
|
||||
"""
|
||||
Then tokens can be detokenize
|
||||
Then tokens can be detokenized
|
||||
And tokens do not begin with BOS
|
||||
|
||||
Scenario: Tokenize w/ BOS
|
||||
Given adding special tokens
|
||||
When tokenizing:
|
||||
"""
|
||||
What is the capital of Germany?
|
||||
"""
|
||||
Then tokens begin with BOS
|
||||
Given first token is removed
|
||||
Then tokens can be detokenized
|
||||
|
||||
Scenario: Models available
|
||||
Given available models
|
||||
|
@ -376,6 +376,11 @@ def step_seed(context, seed):
|
||||
context.seed.append(seed)
|
||||
|
||||
|
||||
@step('BOS token is {bos:d}')
|
||||
def step_bos_token(context, bos):
|
||||
context.bos = bos
|
||||
|
||||
|
||||
@step('a prefix prompt')
|
||||
def step_prompt_prefix(context):
|
||||
context.prompt_prefix = context_text(context)
|
||||
@ -656,21 +661,29 @@ async def all_embeddings_are_generated(context):
|
||||
assert_embeddings(context.tasks_result.pop().pop())
|
||||
|
||||
|
||||
@step('adding special tokens')
|
||||
def step_tokenize_set_add_special(context):
|
||||
context.tokenize_add_special = True
|
||||
|
||||
|
||||
@step('tokenizing')
|
||||
@async_run_until_complete
|
||||
async def step_tokenize(context):
|
||||
context.tokenized_text = context_text(context)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tokenize_args = {
|
||||
"content": context.tokenized_text,
|
||||
}
|
||||
if getattr(context, 'tokenize_add_special', None) is not None:
|
||||
tokenize_args['add_special'] = context.tokenize_add_special
|
||||
async with session.post(f'{context.base_url}/tokenize',
|
||||
json={
|
||||
"content": context.tokenized_text,
|
||||
}) as response:
|
||||
json=tokenize_args) as response:
|
||||
assert response.status == 200
|
||||
tokenize_json = await response.json()
|
||||
context.tokens = tokenize_json['tokens']
|
||||
|
||||
|
||||
@step('tokens can be detokenize')
|
||||
@step('tokens can be detokenized')
|
||||
@async_run_until_complete
|
||||
async def step_detokenize(context):
|
||||
assert len(context.tokens) > 0
|
||||
@ -685,6 +698,21 @@ async def step_detokenize(context):
|
||||
assert context.tokenized_text == detokenize_json['content'].strip()
|
||||
|
||||
|
||||
@step('tokens begin with BOS')
|
||||
def step_strings_for_tokenization(context):
|
||||
assert context.tokens[0] == context.bos
|
||||
|
||||
|
||||
@step('tokens do not begin with BOS')
|
||||
def step_strings_for_tokenization(context):
|
||||
assert context.tokens[0] != context.bos
|
||||
|
||||
|
||||
@step('first token is removed')
|
||||
def step_strings_for_tokenization(context):
|
||||
context.tokens = context.tokens[1:]
|
||||
|
||||
|
||||
@step('an OPTIONS request is sent from {origin}')
|
||||
@async_run_until_complete
|
||||
async def step_options_request(context, origin):
|
||||
|
Loading…
Reference in New Issue
Block a user