From 911b3900dded9a1cfe0f0e41b82c7a29baf3a217 Mon Sep 17 00:00:00 2001 From: Johan Date: Wed, 8 May 2024 14:27:58 +0200 Subject: [PATCH] server : add_special option for tokenize endpoint (#7059) --- examples/server/README.md | 2 +- examples/server/server.cpp | 3 +- examples/server/tests/features/server.feature | 14 +++++++- examples/server/tests/features/steps/steps.py | 36 ++++++++++++++++--- 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index a7c3f0b5f..650317991 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -331,7 +331,7 @@ Notice that each `probs` is an array of length `n_probs`. `content`: Set the text to tokenize. - Note that a special `BOS` token is never inserted. + `add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false` - **POST** `/detokenize`: Convert tokens to text. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 85ae1ad96..06c0be567 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3647,7 +3647,8 @@ int main(int argc, char ** argv) { std::vector tokens; if (body.count("content") != 0) { - tokens = ctx_server.tokenize(body["content"], false); + const bool add_special = json_value(body, "add_special", false); + tokens = ctx_server.tokenize(body["content"], add_special); } const json data = format_tokenizer_response(tokens); return res.set_content(data.dump(), "application/json; charset=utf-8"); diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index 646a4e49d..d21c09135 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -7,6 +7,7 @@ Feature: llama.cpp server And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models And a model file test-model.gguf And a model alias tinyllama-2 + And BOS token is 1 And 42 as server seed # KV Cache corresponds to the total amount of tokens # that can be stored across all independent sequences: #4130 @@ -91,7 +92,18 @@ Feature: llama.cpp server """ What is the capital of France ? """ - Then tokens can be detokenize + Then tokens can be detokenized + And tokens do not begin with BOS + + Scenario: Tokenize w/ BOS + Given adding special tokens + When tokenizing: + """ + What is the capital of Germany? + """ + Then tokens begin with BOS + Given first token is removed + Then tokens can be detokenized Scenario: Models available Given available models diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index b8dbef21d..0882a5d36 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -376,6 +376,11 @@ def step_seed(context, seed): context.seed.append(seed) +@step('BOS token is {bos:d}') +def step_bos_token(context, bos): + context.bos = bos + + @step('a prefix prompt') def step_prompt_prefix(context): context.prompt_prefix = context_text(context) @@ -656,21 +661,29 @@ async def all_embeddings_are_generated(context): assert_embeddings(context.tasks_result.pop().pop()) +@step('adding special tokens') +def step_tokenize_set_add_special(context): + context.tokenize_add_special = True + + @step('tokenizing') @async_run_until_complete async def step_tokenize(context): context.tokenized_text = context_text(context) async with aiohttp.ClientSession() as session: + tokenize_args = { + "content": context.tokenized_text, + } + if getattr(context, 'tokenize_add_special', None) is not None: + tokenize_args['add_special'] = context.tokenize_add_special async with session.post(f'{context.base_url}/tokenize', - json={ - "content": context.tokenized_text, - }) as response: + json=tokenize_args) as response: assert response.status == 200 tokenize_json = await response.json() context.tokens = tokenize_json['tokens'] -@step('tokens can be detokenize') +@step('tokens can be detokenized') @async_run_until_complete async def step_detokenize(context): assert len(context.tokens) > 0 @@ -685,6 +698,21 @@ async def step_detokenize(context): assert context.tokenized_text == detokenize_json['content'].strip() +@step('tokens begin with BOS') +def step_strings_for_tokenization(context): + assert context.tokens[0] == context.bos + + +@step('tokens do not begin with BOS') +def step_strings_for_tokenization(context): + assert context.tokens[0] != context.bos + + +@step('first token is removed') +def step_strings_for_tokenization(context): + context.tokens = context.tokens[1:] + + @step('an OPTIONS request is sent from {origin}') @async_run_until_complete async def step_options_request(context, origin):