diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 733b89ccf..22343cea5 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2591,11 +2591,15 @@ int main(int argc, char ** argv) { return false; }; - auto middleware_server_state = [&state](const httplib::Request &, httplib::Response & res) { + auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { - res.set_content("The model is loading. Please wait.
The user interface will appear soon.", "text/html; charset=utf-8"); - res.status = 503; + if(req.path == "/"){ + res.set_content("The model is loading. Please wait.
The user interface will appear soon.", "text/html; charset=utf-8"); + res.status = 503; + } else { + res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); + } return false; } return true; diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index b55971454..6a3ffe317 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -105,8 +105,16 @@ Feature: llama.cpp server Given first token is removed Then tokens can be detokenized + Scenario: Tokenize with pieces + When tokenizing with pieces: + """ + What is the capital of Germany? + 媽 + """ + Then tokens are given with pieces + Scenario: Models available Given available models Then 1 models are supported Then model 0 is identified by tinyllama-2 - Then model 0 is trained on 128 tokens context + Then model 0 is trained on 128 tokens context \ No newline at end of file diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 65b71a8e8..c463decf0 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1208,6 +1208,7 @@ async def wait_for_slots_status(context, while True: async with await session.get(f'{base_url}/slots', params=params) as slots_response: status_code = slots_response.status + print(await slots_response.text()) slots = await slots_response.json() if context.debug: print(f"slots responses {slots}\n") @@ -1372,4 +1373,4 @@ def start_server_background(context): thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr)) thread_stderr.start() - print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") + print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") \ No newline at end of file