change default temperature of OAI compat API from 0 to 1 (#7226)

* change default temperature of OAI compat API from 0 to 1 * make tests explicitly send temperature to OAI API
2024-12-24 10:24:35 +00:00 · 2024-05-12 19:40:08 -07:00 · 2024-05-12 19:40:08 -07:00 · e586ee4259
commit e586ee4259
parent cbf75894d2
2 changed files with 6 additions and 3 deletions
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -887,6 +887,7 @@ async def oai_chat_completions(user_prompt,
                               base_path,
                               async_client,
                               debug=False,
                               temperature=None,
                               model=None,
                               n_predict=None,
                               enable_streaming=None,
@ -913,7 +914,8 @@ async def oai_chat_completions(user_prompt,
        "model": model,
        "max_tokens": n_predict,
        "stream": enable_streaming,
-        "seed": seed
+        "temperature": temperature if temperature is not None else 0.0,
        "seed": seed,
    }
    if response_format is not None:
        payload['response_format'] = response_format
@ -978,7 +980,8 @@ async def oai_chat_completions(user_prompt,
                max_tokens=n_predict,
                stream=enable_streaming,
                response_format=payload.get('response_format'),
-                seed=seed
+                seed=seed,
                temperature=payload['temperature']
            )
        except openai.error.AuthenticationError as e:
            if expect_api_error is not None and expect_api_error:
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -371,7 +371,7 @@ static json oaicompat_completion_params_parse(
    llama_params["presence_penalty"]  = json_value(body,   "presence_penalty",  0.0);
    llama_params["seed"]              = json_value(body,   "seed",              LLAMA_DEFAULT_SEED);
    llama_params["stream"]            = json_value(body,   "stream",            false);
-    llama_params["temperature"]       = json_value(body,   "temperature",       0.0);
+    llama_params["temperature"]       = json_value(body,   "temperature",       1.0);
    llama_params["top_p"]             = json_value(body,   "top_p",             1.0);
    // Apply chat template to the list of messages