From 0db72b63f54e8f5f80fc4dff36d0bdf4aff8bde3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 12 Oct 2024 09:21:41 +0300 Subject: [PATCH] server : fix non-transformer logic + remove response from /props --- examples/server/server.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 02af8a1e4..42b57d9c4 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2082,10 +2082,13 @@ struct server_context { // keep only the common part int p0 = slot.n_past; + if (!llama_kv_cache_seq_rm(ctx, slot.id + 1, p0, -1)) { // could not partially delete (likely using a non-Transformer model) llama_kv_cache_seq_rm(ctx, slot.id + 1, -1, -1); + p0 = 0; + // there is no common part left slot.n_past = 0; slot.n_past_se = 0; @@ -2773,7 +2776,6 @@ int main(int argc, char ** argv) { const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { json data = { - { "system_prompt", "[unavailable]" }, { "default_generation_settings", ctx_server.default_generation_settings_for_props }, { "total_slots", ctx_server.params.n_parallel }, { "chat_template", llama_get_chat_template(ctx_server.model) },