account for both api and web browser requests

2025-01-13 04:00:16 +00:00 · 2024-09-12 21:44:52 -05:00 · 2024-09-12 21:44:52 -05:00 · cb13382136
commit cb13382136
parent daf64fc4a9
3 changed files with 18 additions and 5 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -2591,11 +2591,15 @@ int main(int argc, char ** argv) {
        return false;
    };
-    auto middleware_server_state = [&state](const httplib::Request &, httplib::Response & res) {
+    auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
        server_state current_state = state.load();
        if (current_state == SERVER_STATE_LOADING_MODEL) {
-            res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>", "text/html; charset=utf-8");
+            if(req.path == "/"){
-            res.status = 503;
+                res.set_content("<html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>", "text/html; charset=utf-8");
                res.status = 503;
            } else {
                res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
            }
            return false;
        }
        return true;
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@ -105,8 +105,16 @@ Feature: llama.cpp server
    Given first token is removed
    Then  tokens can be detokenized
  Scenario: Tokenize with pieces
    When  tokenizing with pieces:
    """
    What is the capital of Germany?
    媽
    """
    Then  tokens are given with pieces
  Scenario: Models available
    Given available models
    Then  1 models are supported
    Then  model 0 is identified by tinyllama-2
-    Then  model 0 is trained on 128 tokens context
+    Then  model 0 is trained on 128 tokens context
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -1208,6 +1208,7 @@ async def wait_for_slots_status(context,
        while True:
            async with await session.get(f'{base_url}/slots', params=params) as slots_response:
                status_code = slots_response.status
                print(await slots_response.text())
                slots = await slots_response.json()
                if context.debug:
                    print(f"slots responses {slots}\n")
@ -1372,4 +1373,4 @@ def start_server_background(context):
    thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr))
    thread_stderr.start()
-    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
+    print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")