mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 04:00:16 +00:00
tool-call
: add tests: tool_call=none, parallel_tool_calls=true
This commit is contained in:
parent
168add7ec8
commit
ec547e4137
@ -746,6 +746,23 @@ async def step_tool_called(context, expected_name, expected_arguments):
|
||||
assert_n_tokens_predicted(result, tool_calls_check=check)
|
||||
assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"
|
||||
|
||||
|
||||
@step('receiving the following tool calls: {expected_tool_calls}')
|
||||
async def step_receiving_tool_calls(context, expected_tool_calls):
|
||||
tool_caexpected_tool_callslls = json.loads(expected_tool_calls)
|
||||
n_completions = await gather_tasks_results(context)
|
||||
assert n_completions > 0
|
||||
|
||||
for i in range(n_completions):
|
||||
result = context.tasks_result.pop()
|
||||
|
||||
def check(tool_calls):
|
||||
assert json.dumps(expected_tool_calls) == json.dumps(tool_calls), f"tool calls: {tool_calls}, expected: {expected_tool_calls}, result = {result}"
|
||||
|
||||
assert_n_tokens_predicted(result, tool_calls_check=check)
|
||||
assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"
|
||||
|
||||
|
||||
@step('no tool is called')
|
||||
@async_run_until_complete
|
||||
async def step_tool_called(context):
|
||||
|
@ -113,3 +113,35 @@ Feature: llama.cpp server
|
||||
And parallel tool calls is disabled
|
||||
And an OAI compatible chat completions request with no api error
|
||||
Then no tool is called
|
||||
|
||||
|
||||
@slow
|
||||
Scenario Outline: Python hello world w/o none tool_choice yields no tool call
|
||||
Given a model file Phi-3.5-mini-instruct-Q4_K_M.gguf from HF repo bartowski/Phi-3.5-mini-instruct-GGUF
|
||||
And no warmup
|
||||
And the server is starting
|
||||
And the server is healthy
|
||||
And a model test
|
||||
And 256 max tokens to predict
|
||||
And a user prompt write a hello world in python
|
||||
And a tool choice none
|
||||
And python tool
|
||||
And parallel tool calls is disabled
|
||||
And an OAI compatible chat completions request with no api error
|
||||
Then no tool is called
|
||||
|
||||
|
||||
@slow
|
||||
Scenario: Parallel tool calls
|
||||
Given a model file Mistral-Nemo-Instruct-2407-Q8_0.gguf from HF repo bartowski/Mistral-Nemo-Instruct-2407-GGUF
|
||||
And a test chat template file named mistralai-Mistral-Nemo-Instruct-2407
|
||||
And no warmup
|
||||
And the server is starting
|
||||
And the server is healthy
|
||||
And a model test
|
||||
And 256 max tokens to predict
|
||||
And a user prompt get the weather in paris and search for llama.cpp's latest commits
|
||||
And python tool
|
||||
And parallel tool calls is enabled
|
||||
And an OAI compatible chat completions request with no api error
|
||||
Then receiving the following tool calls: [{"arguments": {"code": "import requests\nresponse = requests.get('https://api.openweathermap.org/data/2.9/weather?q=Paris&appid=YOUR_API_KEY')\nprint(response.json())"}, "name": "ipython" , "id": "123456789"}, {"arguments": {"code": "!git log --oneline --after 2024-01-01 --before 2024-12-31 llama.cpp" }, "name": "ipython" , "id": "987654321"}]
|
||||
|
Loading…
Reference in New Issue
Block a user