mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 04:00:16 +00:00
tool-call
: add tests: tool_call=none, parallel_tool_calls=true
This commit is contained in:
parent
168add7ec8
commit
ec547e4137
@ -746,6 +746,23 @@ async def step_tool_called(context, expected_name, expected_arguments):
|
|||||||
assert_n_tokens_predicted(result, tool_calls_check=check)
|
assert_n_tokens_predicted(result, tool_calls_check=check)
|
||||||
assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"
|
assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"
|
||||||
|
|
||||||
|
|
||||||
|
@step('receiving the following tool calls: {expected_tool_calls}')
|
||||||
|
async def step_receiving_tool_calls(context, expected_tool_calls):
|
||||||
|
tool_caexpected_tool_callslls = json.loads(expected_tool_calls)
|
||||||
|
n_completions = await gather_tasks_results(context)
|
||||||
|
assert n_completions > 0
|
||||||
|
|
||||||
|
for i in range(n_completions):
|
||||||
|
result = context.tasks_result.pop()
|
||||||
|
|
||||||
|
def check(tool_calls):
|
||||||
|
assert json.dumps(expected_tool_calls) == json.dumps(tool_calls), f"tool calls: {tool_calls}, expected: {expected_tool_calls}, result = {result}"
|
||||||
|
|
||||||
|
assert_n_tokens_predicted(result, tool_calls_check=check)
|
||||||
|
assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"
|
||||||
|
|
||||||
|
|
||||||
@step('no tool is called')
|
@step('no tool is called')
|
||||||
@async_run_until_complete
|
@async_run_until_complete
|
||||||
async def step_tool_called(context):
|
async def step_tool_called(context):
|
||||||
|
@ -113,3 +113,35 @@ Feature: llama.cpp server
|
|||||||
And parallel tool calls is disabled
|
And parallel tool calls is disabled
|
||||||
And an OAI compatible chat completions request with no api error
|
And an OAI compatible chat completions request with no api error
|
||||||
Then no tool is called
|
Then no tool is called
|
||||||
|
|
||||||
|
|
||||||
|
@slow
|
||||||
|
Scenario Outline: Python hello world w/o none tool_choice yields no tool call
|
||||||
|
Given a model file Phi-3.5-mini-instruct-Q4_K_M.gguf from HF repo bartowski/Phi-3.5-mini-instruct-GGUF
|
||||||
|
And no warmup
|
||||||
|
And the server is starting
|
||||||
|
And the server is healthy
|
||||||
|
And a model test
|
||||||
|
And 256 max tokens to predict
|
||||||
|
And a user prompt write a hello world in python
|
||||||
|
And a tool choice none
|
||||||
|
And python tool
|
||||||
|
And parallel tool calls is disabled
|
||||||
|
And an OAI compatible chat completions request with no api error
|
||||||
|
Then no tool is called
|
||||||
|
|
||||||
|
|
||||||
|
@slow
|
||||||
|
Scenario: Parallel tool calls
|
||||||
|
Given a model file Mistral-Nemo-Instruct-2407-Q8_0.gguf from HF repo bartowski/Mistral-Nemo-Instruct-2407-GGUF
|
||||||
|
And a test chat template file named mistralai-Mistral-Nemo-Instruct-2407
|
||||||
|
And no warmup
|
||||||
|
And the server is starting
|
||||||
|
And the server is healthy
|
||||||
|
And a model test
|
||||||
|
And 256 max tokens to predict
|
||||||
|
And a user prompt get the weather in paris and search for llama.cpp's latest commits
|
||||||
|
And python tool
|
||||||
|
And parallel tool calls is enabled
|
||||||
|
And an OAI compatible chat completions request with no api error
|
||||||
|
Then receiving the following tool calls: [{"arguments": {"code": "import requests\nresponse = requests.get('https://api.openweathermap.org/data/2.9/weather?q=Paris&appid=YOUR_API_KEY')\nprint(response.json())"}, "name": "ipython" , "id": "123456789"}, {"arguments": {"code": "!git log --oneline --after 2024-01-01 --before 2024-12-31 llama.cpp" }, "name": "ipython" , "id": "987654321"}]
|
||||||
|
Loading…
Reference in New Issue
Block a user