server : add /detokenize endpoint (#2802)

* Add a /detokenize endpoint to the example server

* remove trailing white-space
This commit is contained in:
Bruce MacDonald 2023-08-26 16:11:45 -07:00 committed by GitHub
parent 730d9c681e
commit c1ac54b77a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 0 deletions

View File

@ -164,6 +164,12 @@ node index.js
Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`. Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
- **POST** `/detokenize`: Convert tokens to text.
*Options:*
`tokens`: Set the tokens to detokenize.
- **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does. - **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does.
*Options:* *Options:*

View File

@ -1104,6 +1104,12 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens)
{"tokens", tokens}}; {"tokens", tokens}};
} }
static json format_detokenized_response(std::string content)
{
return json{
{"content", content}};
}
template <typename T> template <typename T>
static T json_value(const json &body, const std::string &key, const T &default_value) static T json_value(const json &body, const std::string &key, const T &default_value)
{ {
@ -1501,6 +1507,21 @@ int main(int argc, char **argv)
const json data = format_tokenizer_response(tokens); const json data = format_tokenizer_response(tokens);
return res.set_content(data.dump(), "application/json"); }); return res.set_content(data.dump(), "application/json"); });
svr.Post("/detokenize", [&llama](const Request &req, Response &res)
{
auto lock = llama.lock();
const json body = json::parse(req.body);
std::string content;
if (body.count("tokens") != 0)
{
const std::vector<llama_token> tokens = body["tokens"];
content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend());
}
const json data = format_detokenized_response(content);
return res.set_content(data.dump(), "application/json"); });
svr.Post("/embedding", [&llama](const Request &req, Response &res) svr.Post("/embedding", [&llama](const Request &req, Response &res)
{ {
auto lock = llama.lock(); auto lock = llama.lock();