mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 12:10:18 +00:00
tool-call
: more eager function call parsing for Functionary & Llama (give a chance to 3B model)
This commit is contained in:
parent
8b2cf3509f
commit
7cef90cf9c
@ -57,6 +57,56 @@ static bool parse_json(std::string::const_iterator & it, const std::string::cons
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
|
||||
* Aggregates the prefix, suffix and in-between text into the content.
|
||||
*/
|
||||
static llama_tool_calls parse_json_tool_calls(const json & tools, const std::string& input, const std::regex & function_regex, const std::regex & close_regex, bool check_names) {
|
||||
std::smatch match;
|
||||
|
||||
llama_tool_calls result;
|
||||
auto end = input.end();
|
||||
auto it = input.begin();
|
||||
|
||||
std::unordered_set<std::string> tool_names;
|
||||
if (check_names) {
|
||||
for (const auto & tool : tools) {
|
||||
if (tool.contains("type") && tool["type"] == "function") {
|
||||
tool_names.insert(tool["function"]["name"]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (it != end) {
|
||||
std::sregex_iterator rend;
|
||||
std::sregex_iterator rit(it, end, function_regex);
|
||||
if (rit == rend) {
|
||||
result.content += std::string(it, end);
|
||||
break;
|
||||
}
|
||||
auto name = rit->str(1);
|
||||
if (check_names && tool_names.find(name) == tool_names.end()) {
|
||||
result.content += std::string(it, rit->suffix().first);
|
||||
break;
|
||||
}
|
||||
|
||||
result.content += std::string(it, rit->prefix().second);
|
||||
it = rit->suffix().first;
|
||||
|
||||
|
||||
json arguments;
|
||||
if (!parse_json(it, end, arguments)) {
|
||||
throw std::runtime_error("Failed to parse json tool call arguments");
|
||||
}
|
||||
if (!std::regex_search(it, end, match, close_regex)) {
|
||||
throw std::runtime_error("Malformed input, missing closing pattern");
|
||||
}
|
||||
it = match.suffix().first;
|
||||
result.tool_calls.push_back({name, arguments.dump()});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static llama_tool_calls parse_hermes_tool_calls(const std::string& input) {
|
||||
try {
|
||||
std::regex start_pattern(R"([\n\s]*<tool_call>)");
|
||||
@ -100,81 +150,21 @@ static llama_tool_calls parse_hermes_tool_calls(const std::string& input) {
|
||||
}
|
||||
}
|
||||
|
||||
static llama_tool_calls parse_llama_3_1_tool_calls(const json & tools, const std::string& input) {
|
||||
static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
||||
std::smatch match;
|
||||
if (std::regex_search(input, match, python_tag_regex)) {
|
||||
return {
|
||||
match.prefix().str(), {
|
||||
{"ipython", (json {{"code", match[1].str()}}).dump()},
|
||||
}
|
||||
};
|
||||
}
|
||||
try {
|
||||
auto call = json::parse(input);
|
||||
// Only treat JSON as a tool call if it has a name attribute that matches any of the tools specified in the request.
|
||||
// There doesn't seem to be any better way to detect a tool call.
|
||||
if (call.contains("name") && call["name"].is_string()) {
|
||||
std::string name = call["name"];
|
||||
for (const auto & tool : tools) {
|
||||
if (tool.at("function").at("name") == name) {
|
||||
return {
|
||||
"",
|
||||
{
|
||||
{name, call["parameters"].dump()},
|
||||
}
|
||||
};
|
||||
static llama_tool_calls parse_llama_3_tool_calls(const json & tools, const std::string& input, bool allow_python_tag) {
|
||||
if (allow_python_tag) {
|
||||
static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
||||
std::smatch match;
|
||||
if (std::regex_search(input, match, python_tag_regex)) {
|
||||
return {
|
||||
match.prefix().str(), {
|
||||
{"ipython", (json {{"code", match[1].str()}}).dump()},
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
// Do nothing
|
||||
}
|
||||
return {input, {}};
|
||||
}
|
||||
|
||||
static llama_tool_calls parse_functionary_tool_calls(const json & tools, const std::string& input, const std::regex & function_regex, const std::regex & close_regex) {
|
||||
std::smatch match;
|
||||
|
||||
llama_tool_calls result;
|
||||
auto end = input.end();
|
||||
auto it = input.begin();
|
||||
|
||||
std::unordered_set<std::string> tool_names;
|
||||
for (const auto & tool : tools) {
|
||||
if (tool.contains("type") && tool["type"] == "function") {
|
||||
tool_names.insert(tool["function"]["name"]);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
while (it != end) {
|
||||
std::sregex_iterator rend;
|
||||
std::sregex_iterator rit(it, end, function_regex);
|
||||
if (rit == rend) {
|
||||
result.content += std::string(it, end);
|
||||
break;
|
||||
}
|
||||
auto name = rit->str(1);
|
||||
if (tool_names.find(name) == tool_names.end()) {
|
||||
result.content += std::string(it, rit->suffix().first);
|
||||
break;
|
||||
}
|
||||
|
||||
result.content += std::string(it, rit->prefix().second);
|
||||
it = rit->suffix().first;
|
||||
|
||||
|
||||
json arguments;
|
||||
if (!parse_json(it, end, arguments)) {
|
||||
throw std::runtime_error("Failed to parse json tool call arguments");
|
||||
}
|
||||
if (!std::regex_search(it, end, match, close_regex)) {
|
||||
throw std::runtime_error("Malformed input, missing closing pattern");
|
||||
}
|
||||
it = match.suffix().first;
|
||||
result.tool_calls.push_back({name, arguments.dump()});
|
||||
}
|
||||
return result;
|
||||
static std::regex function_regex("(?:^|\\n)\\{\"name\": \"([^\"]+)\", \"parameters\": ");
|
||||
static std::regex close_regex("\\}");
|
||||
return parse_json_tool_calls(tools, input, function_regex, close_regex, /* check_names= */ false);
|
||||
}
|
||||
|
||||
static llama_tool_calls parse_functionary_v3_llama_3_1_tool_calls(const json & tools, const std::string& input) {
|
||||
@ -190,19 +180,21 @@ static llama_tool_calls parse_functionary_v3_llama_3_1_tool_calls(const json & t
|
||||
}
|
||||
static std::regex function_regex(R"(<function=(\w+)>)");
|
||||
static std::regex close_regex(R"(</function>)");
|
||||
return parse_functionary_tool_calls(tools, input, function_regex, close_regex);
|
||||
return parse_json_tool_calls(tools, input, function_regex, close_regex, /* check_names= */ false);
|
||||
}
|
||||
|
||||
static llama_tool_calls parse_functionary_v3_tool_calls(const json & tools, const std::string& input) {
|
||||
static std::regex function_regex(R"((?:>>>)?(\w+)\n)");
|
||||
static std::regex close_regex(R"($|\n(?=>>>))");
|
||||
return parse_functionary_tool_calls(tools, input, function_regex, close_regex);
|
||||
static std::regex close_regex(R"($|(?=>>>))");
|
||||
return parse_json_tool_calls(tools, input, function_regex, close_regex, /* check_names= */ true);
|
||||
}
|
||||
|
||||
llama_tool_calls parse_tool_calls(llama_tool_call_style style, const json & tools, const std::string& input) {
|
||||
switch (style) {
|
||||
case llama_tool_call_style::Llama31:
|
||||
return parse_llama_3_1_tool_calls(tools, input);
|
||||
return parse_llama_3_tool_calls(tools, input, /* parse_llama_3_tool_calls= */ true);
|
||||
case llama_tool_call_style::Llama32:
|
||||
return parse_llama_3_tool_calls(tools, input, /* parse_llama_3_tool_calls= */ false);
|
||||
case llama_tool_call_style::FunctionaryV3Llama3:
|
||||
return parse_functionary_v3_tool_calls(tools, input);
|
||||
case llama_tool_call_style::FunctionaryV3Llama31:
|
||||
@ -224,9 +216,19 @@ llama_tool_call_handler llama_tool_call_handler_init(
|
||||
llama_tool_call_handler handler;
|
||||
|
||||
switch (tmpl.tool_call_style()) {
|
||||
case llama_tool_call_style::Llama31: {
|
||||
case llama_tool_call_style::Llama31:
|
||||
case llama_tool_call_style::Llama32: {
|
||||
static auto builtin_tools = json {"wolfram_alpha", "brave_search"};
|
||||
|
||||
auto uses_python_tag = tmpl.tool_call_style() == llama_tool_call_style::Llama31;
|
||||
|
||||
// Technically we should only trigger on `"\n{\"name\": \"" + name + "\""` for each tool name,
|
||||
// but Llama-3.2-3B struggles to output valid tool calls so we're "guiding" it strongly as soon
|
||||
// as it seems to be outputting some JSON.
|
||||
// TODO: make this conditional on a very small model (e.g. 1B / 3B).
|
||||
auto eagerly_match_any_json = true;
|
||||
|
||||
handler.grammar = build_grammar([&](const llama_grammar_builder & builder) {
|
||||
static std::vector<std::string> builtin_tools {"wolfram_alpha", "brave_search"};
|
||||
std::vector<std::string> tool_rules;
|
||||
|
||||
for (const auto & tool : tools) {
|
||||
@ -234,7 +236,7 @@ llama_tool_call_handler llama_tool_call_handler_init(
|
||||
std::string name = function["name"];
|
||||
auto parameters = function["parameters"];
|
||||
builder.resolve_refs(parameters);
|
||||
if (name == "ipython" || std::find(builtin_tools.begin(), builtin_tools.end(), name) != builtin_tools.end()) {
|
||||
if (uses_python_tag && (name == "ipython" || builtin_tools.contains(name))) {
|
||||
tool_rules.push_back(builder.add_rule("ipython-call", "\"<|python_tag|>\" .*"));
|
||||
if (allow_content) {
|
||||
handler.grammar_trigger_words.push_back("<|python_tag|>");
|
||||
@ -244,15 +246,20 @@ llama_tool_call_handler llama_tool_call_handler_init(
|
||||
tool_rules.push_back(
|
||||
builder.add_rule(
|
||||
name + "-call",
|
||||
"\"\\n{\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " +
|
||||
"\"\\n\"? \"{\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " +
|
||||
builder.add_schema(name + "-args", parameters) +
|
||||
" \"}\""));
|
||||
if (allow_content) {
|
||||
if (allow_content && !eagerly_match_any_json) {
|
||||
handler.grammar_trigger_words.push_back("\n{\"name\": \"" + name + "\"");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (allow_content && eagerly_match_any_json) {
|
||||
handler.grammar_trigger_words.push_back("\n{\"");
|
||||
handler.grammar_trigger_words.push_back("{\"");
|
||||
}
|
||||
|
||||
builder.add_rule("root", join(tool_rules.begin(), tool_rules.end(), " | "));
|
||||
});
|
||||
handler.additional_stop_words.push_back("<|eom_id|>");
|
||||
@ -274,7 +281,7 @@ llama_tool_call_handler llama_tool_call_handler_init(
|
||||
auto parameters = function["parameters"];
|
||||
auto args_rule = builder.add_schema(name + "-args", parameters);
|
||||
first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule));
|
||||
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\"\\n>>>" + name + "\\n\" " + args_rule));
|
||||
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
|
||||
if (allow_content) {
|
||||
handler.grammar_trigger_words.push_back(name + "\n");
|
||||
handler.grammar_trigger_words.push_back(">>>" + name + "\n");
|
||||
|
@ -2,42 +2,47 @@
|
||||
|
||||
- Install prerequisite: [uv](https://docs.astral.sh/uv/) (used to simplify python deps)
|
||||
|
||||
- Run `llama-server` w/ jinja templates:
|
||||
- Run `llama-server` w/ jinja templates. Note that most models need a template override (the HF to GGUF conversion only retains a single `chat_template`, but sometimes the models only support tool calls in an alternative chat template).
|
||||
|
||||
```bash
|
||||
make -j LLAMA_CURL=1 llama-server
|
||||
./llama-server \
|
||||
--jinja -fa \
|
||||
-mu https://huggingface.co/lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Instructions for NousResearch/Hermes-2-Pro-Llama-3-8B (needs template override)</summary>
|
||||
|
||||
The HF model had two variants for its chat template (`default` and `tool_use`), but the GGUF only retained the `default` one.
|
||||
|
||||
```bash
|
||||
./llama-server \
|
||||
--jinja -fa \
|
||||
-mu https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf \
|
||||
# Nous Hermes 2 Pro Llama 3 8B
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF -hff Hermes-2-Pro-Llama-3-8B-Q8_0.gguf \
|
||||
--chat-template-file tests/chat/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
|
||||
```
|
||||
`
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Instructions for meekai/functionary-small-v3.2 (needs template override)</summary>
|
||||
# Llama 3.1 8B
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF -hff Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf
|
||||
|
||||
The template in the GGUF doesn't support tool calls, but its bigger brother's template can be used:
|
||||
|
||||
```bash
|
||||
./llama-server \
|
||||
--jinja -fa \
|
||||
-mu https://huggingface.co/meetkai/functionary-small-v3.2-GGUF/resolve/main/functionary-small-v3.2.Q4_0.gguf \
|
||||
# functionary-small-v3
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr meetkai/functionary-small-v3.2-GGUF -hff functionary-small-v3.2.Q4_0.gguf \
|
||||
--chat-template-file tests/chat/templates/meetkai-functionary-medium-v3.2.jinja
|
||||
```
|
||||
|
||||
</details>
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-m ~/Downloads/functionary-small-v3.2.Q4_0.gguf \
|
||||
--chat-template-file tests/chat/templates/meetkai-functionary-medium-v3.2.jinja
|
||||
|
||||
# Llama 3.2 3B (poor adherence)
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr lmstudio-community/Llama-3.2-3B-Instruct-GGUF -hff Llama-3.2-3B-Instruct-Q6_K_L.gguf \
|
||||
--chat-template-file tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja
|
||||
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-m ~/Downloads/Llama-3.2-3B-Instruct-Q6_K_L.gguf \
|
||||
--chat-template-file tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja
|
||||
|
||||
# Llama 3.2 1B (very poor adherence)
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr lmstudio-community/Llama-3.2-1B-Instruct-GGUF -hff Llama-3.2-1B-Instruct-Q4_K_M.gguf \
|
||||
--chat-template-file tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja
|
||||
|
||||
# Llama 3.1 70B (untested)
|
||||
./llama-server --jinja -fa --verbose \
|
||||
-hfr lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF -hff Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf
|
||||
```
|
||||
|
||||
- Run some tools inside a docker container (check http://localhost:8088/docs once running):
|
||||
|
||||
@ -57,3 +62,7 @@
|
||||
--tool-endpoint http://localhost:8088 \
|
||||
--goal "What is the sum of 2535 squared and 32222000403?"
|
||||
```
|
||||
|
||||
## TODO
|
||||
|
||||
- Implement code_interpreter using whichever tools are builtin for a given model.
|
||||
|
@ -35,7 +35,9 @@ Feature: llama.cpp server
|
||||
| meetkai-functionary-medium-v3.2 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] |
|
||||
| meetkai-functionary-medium-v3.2 | 128 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] |
|
||||
| meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] |
|
||||
| meta-llama-Meta-Llama-3.1-8B-Instruct | 16 | ipython | {"code": "it and "} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] |
|
||||
| meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | ipython | {"code": "it and realed at the otter. Asked Dave Dasty, Daisy is a big, shiny blue. As"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] |
|
||||
| meta-llama-Llama-3.2-3B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] |
|
||||
| meta-llama-Llama-3.2-3B-Instruct | 64 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] |
|
||||
|
||||
|
||||
Scenario Outline: OAI Compatibility w/ tools and auto tool_choice
|
||||
|
Loading…
Reference in New Issue
Block a user