server : added with_pieces functionality to /tokenize endpoint

2024-11-11 21:39:52 +00:00 · 2024-08-20 23:28:06 +02:00 · 2024-08-20 23:28:06 +02:00 · a2d4d1913c
commit a2d4d1913c
parent 2f3c1466ff
4 changed files with 71 additions and 6 deletions
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -500,9 +500,34 @@ Notice that each `probs` is an array of length `n_probs`.

    *Options:*

-    `content`: Set the text to tokenize.
+    `content`: (Required) The text to tokenize.
    
-    `add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted.  Default: `false`
+    `add_special`: (Optional) Boolean indicating if special tokens, i.e. `BOS`, should be inserted.  Default: `false`
+
+    `with_pieces`: (Optional) Boolean indicating whether to return token pieces along with IDs.  Default: `false`
+
+**Response:**
+
+Returns a JSON object with a `tokens` field containing the tokenization result. The `tokens` array contains either just token IDs or objects with `id` and `piece` fields, depending on the `with_pieces` parameter.
+
+
+If `with_pieces` is `false`:
+```json
+{
+  "tokens": [123, 456, 789]
+}
+```
+
+If `with_pieces` is `true`:
+```json
+{
+  "tokens": [
+    {"id": 123, "piece": "Hello"},
+    {"id": 456, "piece": " world"},
+    {"id": 789, "piece": "!"}
+  ]
+}
+```

 ### POST `/detokenize`: Convert tokens to text

--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -3189,12 +3189,26 @@ int main(int argc, char ** argv) {
    const auto handle_tokenize = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
        const json body = json::parse(req.body);

-        std::vector<llama_token> tokens;
+        json tokens_response = json::array();
        if (body.count("content") != 0) {
            const bool add_special = json_value(body, "add_special", false);
-            tokens = ctx_server.tokenize(body.at("content"), add_special);
+            const bool with_pieces = json_value(body, "with_pieces", false);
+            std::vector<llama_token> tokens = ctx_server.tokenize(body.at("content"), add_special);
+
+            if (with_pieces) {
+                for (const auto& token : tokens) {
+                    std::string piece = llama_token_to_piece(ctx_server.ctx, token);
+                    tokens_response.push_back({
+                        {"id", token},
+                        {"piece", piece}
+                    });
                }
-        const json data = format_tokenizer_response(tokens);
+            } else {
+                tokens_response = tokens;
+            }
+        }
+        
+        const json data = format_tokenizer_response(tokens_response);
        return res.set_content(data.dump(), MIMETYPE_JSON);
    };

--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -685,6 +685,32 @@ def step_tokenize_set_add_special(context):
    context.tokenize_add_special = True


+@step("tokenizing with pieces")
+@async_run_until_complete
+async def step_tokenize_with_pieces(context):
+    context.tokenized_text = context_text(context)
+    async with aiohttp.ClientSession() as session:
+        tokenize_args = {"content": context.tokenized_text, "with_pieces": True}
+        if getattr(context, "tokenize_add_special", None) is not None:
+            tokenize_args["add_special"] = context.tokenize_add_special
+
+        async with session.post(
+            f"{context.base_url}/tokenize", json=tokenize_args
+        ) as response:
+            assert response.status == 200
+            tokenize_json = await response.json()
+            context.tokens_with_pieces = tokenize_json["tokens"]
+
+
+@step("tokens with pieces are complete")
+@async_run_until_complete
+async def step_tokenize_with_pieces(context):
+    # Verify that the response contains both token IDs and pieces
+    assert all(
+        "id" in token and "piece" in token for token in context.tokens_with_pieces
+    )
+
+
@step('tokenizing')
@async_run_until_complete
 async def step_tokenize(context):
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -583,7 +583,7 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
    return res;
 }

-static json format_tokenizer_response(const std::vector<llama_token> & tokens) {
+static json format_tokenizer_response(const json & tokens) {
    return json {
        {"tokens", tokens}
    };