mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
server : add /detokenize
endpoint (#2802)
* Add a /detokenize endpoint to the example server * remove trailing white-space
This commit is contained in:
parent
730d9c681e
commit
c1ac54b77a
@ -164,6 +164,12 @@ node index.js
|
|||||||
|
|
||||||
Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
|
Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
|
||||||
|
|
||||||
|
- **POST** `/detokenize`: Convert tokens to text.
|
||||||
|
|
||||||
|
*Options:*
|
||||||
|
|
||||||
|
`tokens`: Set the tokens to detokenize.
|
||||||
|
|
||||||
- **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does.
|
- **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does.
|
||||||
|
|
||||||
*Options:*
|
*Options:*
|
||||||
|
@ -1104,6 +1104,12 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens)
|
|||||||
{"tokens", tokens}};
|
{"tokens", tokens}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static json format_detokenized_response(std::string content)
|
||||||
|
{
|
||||||
|
return json{
|
||||||
|
{"content", content}};
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static T json_value(const json &body, const std::string &key, const T &default_value)
|
static T json_value(const json &body, const std::string &key, const T &default_value)
|
||||||
{
|
{
|
||||||
@ -1501,6 +1507,21 @@ int main(int argc, char **argv)
|
|||||||
const json data = format_tokenizer_response(tokens);
|
const json data = format_tokenizer_response(tokens);
|
||||||
return res.set_content(data.dump(), "application/json"); });
|
return res.set_content(data.dump(), "application/json"); });
|
||||||
|
|
||||||
|
svr.Post("/detokenize", [&llama](const Request &req, Response &res)
|
||||||
|
{
|
||||||
|
auto lock = llama.lock();
|
||||||
|
|
||||||
|
const json body = json::parse(req.body);
|
||||||
|
std::string content;
|
||||||
|
if (body.count("tokens") != 0)
|
||||||
|
{
|
||||||
|
const std::vector<llama_token> tokens = body["tokens"];
|
||||||
|
content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend());
|
||||||
|
}
|
||||||
|
|
||||||
|
const json data = format_detokenized_response(content);
|
||||||
|
return res.set_content(data.dump(), "application/json"); });
|
||||||
|
|
||||||
svr.Post("/embedding", [&llama](const Request &req, Response &res)
|
svr.Post("/embedding", [&llama](const Request &req, Response &res)
|
||||||
{
|
{
|
||||||
auto lock = llama.lock();
|
auto lock = llama.lock();
|
||||||
|
Loading…
Reference in New Issue
Block a user