tool-call: greedy sampling in server tests + tweak prompt

This commit is contained in:
ochafik 2024-10-31 04:38:22 +00:00
parent be9de3ed8a
commit 542853b34b
2 changed files with 24 additions and 15 deletions

View File

@ -93,6 +93,7 @@ def step_server_config(context, server_fqdn: str, server_port: str):
context.warmup = True
context.use_jinja = False
context.chat_template_file = None
context.greedy_sampling = False
# infill
context.infill_input_extra = None
@ -190,6 +191,11 @@ def step_no_warmup(context):
context.warmup = False
@step('greedy sampling')
def step_greedy_sampling(context):
context.greedy_sampling = True
@step('a chat template file {file}')
def step_chat_template_file(context, file):
context.chat_template_file = file
@ -446,13 +452,13 @@ def step_python_tool(context):
"type": "function",
"function": {
"name": "ipython",
"description": "",
"description": "Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.",
"parameters": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": ""
"description": "The code to run in the ipython interpreter."
}
},
"required": ["code"]
@ -1658,6 +1664,8 @@ def start_server_background(context):
server_args.extend(['--lora', context.lora_file])
if context.disable_ctx_shift:
server_args.extend(['--no-context-shift'])
if context.greedy_sampling:
server_args.extend(['--samplers', 'top-k', '--top-k', '1'])
if not context.warmup:
server_args.extend(['--no-warmup'])

View File

@ -6,6 +6,7 @@ Feature: llama.cpp server
Given a server listening on localhost:8080
And BOS token is 1
And 42 as server seed
And greedy sampling
And 8192 KV cache size
And 32 as batch size
And 1 slots
@ -20,7 +21,7 @@ Feature: llama.cpp server
And the server is healthy
And a model test
And <n_predict> max tokens to predict
And a user prompt write a hello world in python
And a user prompt say hello world with python
And a tool choice required
And tools <tools>
And parallel tool calls is <parallel_tool_calls>
@ -38,11 +39,11 @@ Feature: llama.cpp server
| NousResearch-Hermes-3-Llama-3.1-8B-tool_use | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled |
| NousResearch-Hermes-3-Llama-3.1-8B-tool_use | 128 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
| meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled |
| meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | ipython | {"code": "it and realed at the otter. Asked Dave Dasty, Daisy is a big, shiny blue. As"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
| meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | ipython | {"code": "it and realed at the otter. Asked Dave Daisy, Daisy is a big, shiny blue. As"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
| meta-llama-Llama-3.2-3B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled |
| meta-llama-Llama-3.2-3B-Instruct | 64 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
| mistralai-Mistral-Nemo-Instruct-2407 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled |
| mistralai-Mistral-Nemo-Instruct-2407 | 128 | ipython | {"code": "It's a small cat."} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
| mistralai-Mistral-Nemo-Instruct-2407 | 128 | ipython | {"code": "It's a spector."} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
Scenario Outline: Template <template_name> + tinystories model yields no tool call
@ -52,7 +53,7 @@ Feature: llama.cpp server
And the server is healthy
And a model test
And <n_predict> max tokens to predict
And a user prompt write a hello world in python
And a user prompt say hello world with python
And tools [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]
And an OAI compatible chat completions request with no api error
Then no tool is called
@ -71,7 +72,7 @@ Feature: llama.cpp server
And the server is healthy
And a model test
And 16 max tokens to predict
And a user prompt write a hello world in python
And a user prompt say hello world with python
And tools []
And an OAI compatible chat completions request with no api error
Then no tool is called
@ -86,7 +87,7 @@ Feature: llama.cpp server
And the server is healthy
And a model test
And 256 max tokens to predict
And a user prompt write a hello world in python
And a user prompt say hello world with python
And python tool
And parallel tool calls is disabled
And an OAI compatible chat completions request with no api error
@ -94,16 +95,16 @@ Feature: llama.cpp server
Examples: Prompts
| tool_name | tool_arguments | hf_repo | hf_file | template_override |
| ipython | {"code": "print('Hello, World!')"} | bartowski/Qwen2.5-7B-Instruct-GGUF | Qwen2.5-7B-Instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, World!')"} | bartowski/Phi-3.5-mini-instruct-GGUF | Phi-3.5-mini-instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, World!')"} | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
| ipython | {"code": "print('Hello World!')"} | NousResearch/Hermes-3-Llama-3.1-8B-GGUF | Hermes-3-Llama-3.1-8B.Q4_K_M.gguf | NousResearch-Hermes-3-Llama-3.1-8B-tool_use |
| ipython | {"code": "print('Hello, World!')"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF | Mistral-Nemo-Instruct-2407-Q4_K_M.gguf | mistralai-Mistral-Nemo-Instruct-2407 |
| ipython | {"code": "print(\"Hello World\")"} | bartowski/Qwen2.5-7B-Instruct-GGUF | Qwen2.5-7B-Instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, World!')"} | bartowski/Phi-3.5-mini-instruct-GGUF | Phi-3.5-mini-instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, world!')"} | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
| ipython | {"code": "print('hello world')"} | NousResearch/Hermes-3-Llama-3.1-8B-GGUF | Hermes-3-Llama-3.1-8B.Q4_K_M.gguf | NousResearch-Hermes-3-Llama-3.1-8B-tool_use |
| ipython | {"code": "print('Hello, World!'}"} | lmstudio-community/Llama-3.2-1B-Instruct-GGUF | Llama-3.2-1B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
| ipython | {"code": "print("} | lmstudio-community/Llama-3.2-3B-Instruct-GGUF | Llama-3.2-3B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
| ipython | {"code": "print("} | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF | Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, World!')"} | bartowski/functionary-small-v3.2-GGUF | functionary-small-v3.2-Q8_0.gguf | meetkai-functionary-medium-v3.2 |
# | ipython | {"code": "print('Hello, world!')"} | bartowski/gemma-2-2b-it-GGUF | gemma-2-2b-it-Q4_K_M.gguf | |
# | ipython | {"code": "print('Hello, World!')"} | bartowski/functionary-small-v3.2-GGUF | functionary-small-v3.2-Q8_0.gguf | meetkai-functionary-medium-v3.2 |
@slow
@ -114,7 +115,7 @@ Feature: llama.cpp server
And the server is healthy
And a model test
And 256 max tokens to predict
And a user prompt write a hello world in python
And a user prompt say hello world with python
And parallel tool calls is disabled
And an OAI compatible chat completions request with no api error
Then no tool is called
@ -128,7 +129,7 @@ Feature: llama.cpp server
And the server is healthy
And a model test
And 256 max tokens to predict
And a user prompt write a hello world in python
And a user prompt say hello world with python
And a tool choice none
And python tool
And parallel tool calls is disabled