tool-call: greedy sampling in server tests + tweak prompt

This commit is contained in:
ochafik 2024-10-31 04:38:22 +00:00
parent be9de3ed8a
commit 542853b34b
2 changed files with 24 additions and 15 deletions

View File

@ -93,6 +93,7 @@ def step_server_config(context, server_fqdn: str, server_port: str):
context.warmup = True context.warmup = True
context.use_jinja = False context.use_jinja = False
context.chat_template_file = None context.chat_template_file = None
context.greedy_sampling = False
# infill # infill
context.infill_input_extra = None context.infill_input_extra = None
@ -190,6 +191,11 @@ def step_no_warmup(context):
context.warmup = False context.warmup = False
@step('greedy sampling')
def step_greedy_sampling(context):
context.greedy_sampling = True
@step('a chat template file {file}') @step('a chat template file {file}')
def step_chat_template_file(context, file): def step_chat_template_file(context, file):
context.chat_template_file = file context.chat_template_file = file
@ -446,13 +452,13 @@ def step_python_tool(context):
"type": "function", "type": "function",
"function": { "function": {
"name": "ipython", "name": "ipython",
"description": "", "description": "Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.",
"parameters": { "parameters": {
"type": "object", "type": "object",
"properties": { "properties": {
"code": { "code": {
"type": "string", "type": "string",
"description": "" "description": "The code to run in the ipython interpreter."
} }
}, },
"required": ["code"] "required": ["code"]
@ -1658,6 +1664,8 @@ def start_server_background(context):
server_args.extend(['--lora', context.lora_file]) server_args.extend(['--lora', context.lora_file])
if context.disable_ctx_shift: if context.disable_ctx_shift:
server_args.extend(['--no-context-shift']) server_args.extend(['--no-context-shift'])
if context.greedy_sampling:
server_args.extend(['--samplers', 'top-k', '--top-k', '1'])
if not context.warmup: if not context.warmup:
server_args.extend(['--no-warmup']) server_args.extend(['--no-warmup'])

View File

@ -6,6 +6,7 @@ Feature: llama.cpp server
Given a server listening on localhost:8080 Given a server listening on localhost:8080
And BOS token is 1 And BOS token is 1
And 42 as server seed And 42 as server seed
And greedy sampling
And 8192 KV cache size And 8192 KV cache size
And 32 as batch size And 32 as batch size
And 1 slots And 1 slots
@ -20,7 +21,7 @@ Feature: llama.cpp server
And the server is healthy And the server is healthy
And a model test And a model test
And <n_predict> max tokens to predict And <n_predict> max tokens to predict
And a user prompt write a hello world in python And a user prompt say hello world with python
And a tool choice required And a tool choice required
And tools <tools> And tools <tools>
And parallel tool calls is <parallel_tool_calls> And parallel tool calls is <parallel_tool_calls>
@ -38,11 +39,11 @@ Feature: llama.cpp server
| NousResearch-Hermes-3-Llama-3.1-8B-tool_use | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | | NousResearch-Hermes-3-Llama-3.1-8B-tool_use | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled |
| NousResearch-Hermes-3-Llama-3.1-8B-tool_use | 128 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | | NousResearch-Hermes-3-Llama-3.1-8B-tool_use | 128 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
| meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | | meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled |
| meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | ipython | {"code": "it and realed at the otter. Asked Dave Dasty, Daisy is a big, shiny blue. As"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | | meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | ipython | {"code": "it and realed at the otter. Asked Dave Daisy, Daisy is a big, shiny blue. As"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
| meta-llama-Llama-3.2-3B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | | meta-llama-Llama-3.2-3B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled |
| meta-llama-Llama-3.2-3B-Instruct | 64 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | | meta-llama-Llama-3.2-3B-Instruct | 64 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
| mistralai-Mistral-Nemo-Instruct-2407 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | | mistralai-Mistral-Nemo-Instruct-2407 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled |
| mistralai-Mistral-Nemo-Instruct-2407 | 128 | ipython | {"code": "It's a small cat."} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | | mistralai-Mistral-Nemo-Instruct-2407 | 128 | ipython | {"code": "It's a spector."} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
Scenario Outline: Template <template_name> + tinystories model yields no tool call Scenario Outline: Template <template_name> + tinystories model yields no tool call
@ -52,7 +53,7 @@ Feature: llama.cpp server
And the server is healthy And the server is healthy
And a model test And a model test
And <n_predict> max tokens to predict And <n_predict> max tokens to predict
And a user prompt write a hello world in python And a user prompt say hello world with python
And tools [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] And tools [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]
And an OAI compatible chat completions request with no api error And an OAI compatible chat completions request with no api error
Then no tool is called Then no tool is called
@ -71,7 +72,7 @@ Feature: llama.cpp server
And the server is healthy And the server is healthy
And a model test And a model test
And 16 max tokens to predict And 16 max tokens to predict
And a user prompt write a hello world in python And a user prompt say hello world with python
And tools [] And tools []
And an OAI compatible chat completions request with no api error And an OAI compatible chat completions request with no api error
Then no tool is called Then no tool is called
@ -86,7 +87,7 @@ Feature: llama.cpp server
And the server is healthy And the server is healthy
And a model test And a model test
And 256 max tokens to predict And 256 max tokens to predict
And a user prompt write a hello world in python And a user prompt say hello world with python
And python tool And python tool
And parallel tool calls is disabled And parallel tool calls is disabled
And an OAI compatible chat completions request with no api error And an OAI compatible chat completions request with no api error
@ -94,16 +95,16 @@ Feature: llama.cpp server
Examples: Prompts Examples: Prompts
| tool_name | tool_arguments | hf_repo | hf_file | template_override | | tool_name | tool_arguments | hf_repo | hf_file | template_override |
| ipython | {"code": "print('Hello, World!')"} | bartowski/Qwen2.5-7B-Instruct-GGUF | Qwen2.5-7B-Instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, World!')"} | bartowski/Phi-3.5-mini-instruct-GGUF | Phi-3.5-mini-instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, World!')"} | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
| ipython | {"code": "print('Hello World!')"} | NousResearch/Hermes-3-Llama-3.1-8B-GGUF | Hermes-3-Llama-3.1-8B.Q4_K_M.gguf | NousResearch-Hermes-3-Llama-3.1-8B-tool_use |
| ipython | {"code": "print('Hello, World!')"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF | Mistral-Nemo-Instruct-2407-Q4_K_M.gguf | mistralai-Mistral-Nemo-Instruct-2407 | | ipython | {"code": "print('Hello, World!')"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF | Mistral-Nemo-Instruct-2407-Q4_K_M.gguf | mistralai-Mistral-Nemo-Instruct-2407 |
| ipython | {"code": "print(\"Hello World\")"} | bartowski/Qwen2.5-7B-Instruct-GGUF | Qwen2.5-7B-Instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, World!')"} | bartowski/Phi-3.5-mini-instruct-GGUF | Phi-3.5-mini-instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, world!')"} | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
| ipython | {"code": "print('hello world')"} | NousResearch/Hermes-3-Llama-3.1-8B-GGUF | Hermes-3-Llama-3.1-8B.Q4_K_M.gguf | NousResearch-Hermes-3-Llama-3.1-8B-tool_use |
| ipython | {"code": "print('Hello, World!'}"} | lmstudio-community/Llama-3.2-1B-Instruct-GGUF | Llama-3.2-1B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct | | ipython | {"code": "print('Hello, World!'}"} | lmstudio-community/Llama-3.2-1B-Instruct-GGUF | Llama-3.2-1B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
| ipython | {"code": "print("} | lmstudio-community/Llama-3.2-3B-Instruct-GGUF | Llama-3.2-3B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct | | ipython | {"code": "print("} | lmstudio-community/Llama-3.2-3B-Instruct-GGUF | Llama-3.2-3B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
| ipython | {"code": "print("} | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF | Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf | | | ipython | {"code": "print("} | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF | Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf | |
| ipython | {"code": "print('Hello, World!')"} | bartowski/functionary-small-v3.2-GGUF | functionary-small-v3.2-Q8_0.gguf | meetkai-functionary-medium-v3.2 |
# | ipython | {"code": "print('Hello, world!')"} | bartowski/gemma-2-2b-it-GGUF | gemma-2-2b-it-Q4_K_M.gguf | | # | ipython | {"code": "print('Hello, world!')"} | bartowski/gemma-2-2b-it-GGUF | gemma-2-2b-it-Q4_K_M.gguf | |
# | ipython | {"code": "print('Hello, World!')"} | bartowski/functionary-small-v3.2-GGUF | functionary-small-v3.2-Q8_0.gguf | meetkai-functionary-medium-v3.2 |
@slow @slow
@ -114,7 +115,7 @@ Feature: llama.cpp server
And the server is healthy And the server is healthy
And a model test And a model test
And 256 max tokens to predict And 256 max tokens to predict
And a user prompt write a hello world in python And a user prompt say hello world with python
And parallel tool calls is disabled And parallel tool calls is disabled
And an OAI compatible chat completions request with no api error And an OAI compatible chat completions request with no api error
Then no tool is called Then no tool is called
@ -128,7 +129,7 @@ Feature: llama.cpp server
And the server is healthy And the server is healthy
And a model test And a model test
And 256 max tokens to predict And 256 max tokens to predict
And a user prompt write a hello world in python And a user prompt say hello world with python
And a tool choice none And a tool choice none
And python tool And python tool
And parallel tool calls is disabled And parallel tool calls is disabled