From a2d4d1913cd1f862c5dd8d3d7a9f90b3aed9079e Mon Sep 17 00:00:00 2001 From: Mathijs Henquet Date: Tue, 20 Aug 2024 23:28:06 +0200 Subject: [PATCH] server : added with_pieces functionality to /tokenize endpoint --- examples/server/README.md | 29 +++++++++++++++++-- examples/server/server.cpp | 20 +++++++++++-- examples/server/tests/features/steps/steps.py | 26 +++++++++++++++++ examples/server/utils.hpp | 2 +- 4 files changed, 71 insertions(+), 6 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 930ae15f6..dba47d94d 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -500,9 +500,34 @@ Notice that each `probs` is an array of length `n_probs`. *Options:* - `content`: Set the text to tokenize. + `content`: (Required) The text to tokenize. + + `add_special`: (Optional) Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false` - `add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false` + `with_pieces`: (Optional) Boolean indicating whether to return token pieces along with IDs. Default: `false` + +**Response:** + +Returns a JSON object with a `tokens` field containing the tokenization result. The `tokens` array contains either just token IDs or objects with `id` and `piece` fields, depending on the `with_pieces` parameter. + + +If `with_pieces` is `false`: +```json +{ + "tokens": [123, 456, 789] +} +``` + +If `with_pieces` is `true`: +```json +{ + "tokens": [ + {"id": 123, "piece": "Hello"}, + {"id": 456, "piece": " world"}, + {"id": 789, "piece": "!"} + ] +} +``` ### POST `/detokenize`: Convert tokens to text diff --git a/examples/server/server.cpp b/examples/server/server.cpp index ce711eadd..543092409 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3189,12 +3189,26 @@ int main(int argc, char ** argv) { const auto handle_tokenize = [&ctx_server](const httplib::Request & req, httplib::Response & res) { const json body = json::parse(req.body); - std::vector tokens; + json tokens_response = json::array(); if (body.count("content") != 0) { const bool add_special = json_value(body, "add_special", false); - tokens = ctx_server.tokenize(body.at("content"), add_special); + const bool with_pieces = json_value(body, "with_pieces", false); + std::vector tokens = ctx_server.tokenize(body.at("content"), add_special); + + if (with_pieces) { + for (const auto& token : tokens) { + std::string piece = llama_token_to_piece(ctx_server.ctx, token); + tokens_response.push_back({ + {"id", token}, + {"piece", piece} + }); + } + } else { + tokens_response = tokens; + } } - const json data = format_tokenizer_response(tokens); + + const json data = format_tokenizer_response(tokens_response); return res.set_content(data.dump(), MIMETYPE_JSON); }; diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 1ba7b60b6..fec6bcae5 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -685,6 +685,32 @@ def step_tokenize_set_add_special(context): context.tokenize_add_special = True +@step("tokenizing with pieces") +@async_run_until_complete +async def step_tokenize_with_pieces(context): + context.tokenized_text = context_text(context) + async with aiohttp.ClientSession() as session: + tokenize_args = {"content": context.tokenized_text, "with_pieces": True} + if getattr(context, "tokenize_add_special", None) is not None: + tokenize_args["add_special"] = context.tokenize_add_special + + async with session.post( + f"{context.base_url}/tokenize", json=tokenize_args + ) as response: + assert response.status == 200 + tokenize_json = await response.json() + context.tokens_with_pieces = tokenize_json["tokens"] + + +@step("tokens with pieces are complete") +@async_run_until_complete +async def step_tokenize_with_pieces(context): + # Verify that the response contains both token IDs and pieces + assert all( + "id" in token and "piece" in token for token in context.tokens_with_pieces + ) + + @step('tokenizing') @async_run_until_complete async def step_tokenize(context): diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index e6a1f0697..42635acca 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -583,7 +583,7 @@ static json format_embeddings_response_oaicompat(const json & request, const jso return res; } -static json format_tokenizer_response(const std::vector & tokens) { +static json format_tokenizer_response(const json & tokens) { return json { {"tokens", tokens} };