mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-11 21:39:52 +00:00
server : added with_pieces functionality to /tokenize endpoint
This commit is contained in:
parent
2f3c1466ff
commit
a2d4d1913c
@ -500,9 +500,34 @@ Notice that each `probs` is an array of length `n_probs`.
|
||||
|
||||
*Options:*
|
||||
|
||||
`content`: Set the text to tokenize.
|
||||
`content`: (Required) The text to tokenize.
|
||||
|
||||
`add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false`
|
||||
`add_special`: (Optional) Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false`
|
||||
|
||||
`with_pieces`: (Optional) Boolean indicating whether to return token pieces along with IDs. Default: `false`
|
||||
|
||||
**Response:**
|
||||
|
||||
Returns a JSON object with a `tokens` field containing the tokenization result. The `tokens` array contains either just token IDs or objects with `id` and `piece` fields, depending on the `with_pieces` parameter.
|
||||
|
||||
|
||||
If `with_pieces` is `false`:
|
||||
```json
|
||||
{
|
||||
"tokens": [123, 456, 789]
|
||||
}
|
||||
```
|
||||
|
||||
If `with_pieces` is `true`:
|
||||
```json
|
||||
{
|
||||
"tokens": [
|
||||
{"id": 123, "piece": "Hello"},
|
||||
{"id": 456, "piece": " world"},
|
||||
{"id": 789, "piece": "!"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### POST `/detokenize`: Convert tokens to text
|
||||
|
||||
|
@ -3189,12 +3189,26 @@ int main(int argc, char ** argv) {
|
||||
const auto handle_tokenize = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
const json body = json::parse(req.body);
|
||||
|
||||
std::vector<llama_token> tokens;
|
||||
json tokens_response = json::array();
|
||||
if (body.count("content") != 0) {
|
||||
const bool add_special = json_value(body, "add_special", false);
|
||||
tokens = ctx_server.tokenize(body.at("content"), add_special);
|
||||
const bool with_pieces = json_value(body, "with_pieces", false);
|
||||
std::vector<llama_token> tokens = ctx_server.tokenize(body.at("content"), add_special);
|
||||
|
||||
if (with_pieces) {
|
||||
for (const auto& token : tokens) {
|
||||
std::string piece = llama_token_to_piece(ctx_server.ctx, token);
|
||||
tokens_response.push_back({
|
||||
{"id", token},
|
||||
{"piece", piece}
|
||||
});
|
||||
}
|
||||
const json data = format_tokenizer_response(tokens);
|
||||
} else {
|
||||
tokens_response = tokens;
|
||||
}
|
||||
}
|
||||
|
||||
const json data = format_tokenizer_response(tokens_response);
|
||||
return res.set_content(data.dump(), MIMETYPE_JSON);
|
||||
};
|
||||
|
||||
|
@ -685,6 +685,32 @@ def step_tokenize_set_add_special(context):
|
||||
context.tokenize_add_special = True
|
||||
|
||||
|
||||
@step("tokenizing with pieces")
|
||||
@async_run_until_complete
|
||||
async def step_tokenize_with_pieces(context):
|
||||
context.tokenized_text = context_text(context)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tokenize_args = {"content": context.tokenized_text, "with_pieces": True}
|
||||
if getattr(context, "tokenize_add_special", None) is not None:
|
||||
tokenize_args["add_special"] = context.tokenize_add_special
|
||||
|
||||
async with session.post(
|
||||
f"{context.base_url}/tokenize", json=tokenize_args
|
||||
) as response:
|
||||
assert response.status == 200
|
||||
tokenize_json = await response.json()
|
||||
context.tokens_with_pieces = tokenize_json["tokens"]
|
||||
|
||||
|
||||
@step("tokens with pieces are complete")
|
||||
@async_run_until_complete
|
||||
async def step_tokenize_with_pieces(context):
|
||||
# Verify that the response contains both token IDs and pieces
|
||||
assert all(
|
||||
"id" in token and "piece" in token for token in context.tokens_with_pieces
|
||||
)
|
||||
|
||||
|
||||
@step('tokenizing')
|
||||
@async_run_until_complete
|
||||
async def step_tokenize(context):
|
||||
|
@ -583,7 +583,7 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
|
||||
return res;
|
||||
}
|
||||
|
||||
static json format_tokenizer_response(const std::vector<llama_token> & tokens) {
|
||||
static json format_tokenizer_response(const json & tokens) {
|
||||
return json {
|
||||
{"tokens", tokens}
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user