Merge branch 'server-rev' of https://github.com//ggerganov/llama.cpp into server-rev

2025-01-13 20:14:29 +00:00 · 2023-10-22 19:49:48 +03:00 · 2023-10-22 19:49:48 +03:00 · a4d69d8b81
commit a4d69d8b81
parent 2679c432d5 a8063171bd
3 changed files with 1680 additions and 1673 deletions
--- a/examples/server/index.html.hpp
+++ b/examples/server/index.html.hpp
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@ -413,7 +413,7 @@
          currentMessages.push(data);
          slot_id = data.slot_id;
          if (selected_image && !data.multimodal) {
-            alert("The server was no compiled for multimodal or the model projector can't be loaded.");
+            alert("The server was not compiled for multimodal or the model projector can't be loaded.");
            return;
          }
          transcriptUpdate([...history, [char, currentMessages]])
@ -470,6 +470,7 @@
      transcriptUpdate([...session.value.transcript, ["", prompt]]);
      await runLlama(prompt, {
        ...params.value,
+        slot_id: slot_id,
        stop: [],
      }, "");
    }
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -655,6 +655,7 @@ struct llama_server_context
    bool launch_slot_with_data(llama_client_slot* &slot, json data) {
        slot_params default_params;
        llama_sampling_params default_sparams;
+
        slot->params.stream           = json_value(data, "stream",            false);
        slot->params.cache_prompt     = json_value(data, "cache_prompt",      false);
        slot->params.n_predict        = json_value(data, "n_predict",         default_params.n_predict);
@ -1515,7 +1516,9 @@ struct llama_server_context
                        prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
                        prefix_tokens.push_back(llama_token_middle(ctx));
                        prompt_tokens = prefix_tokens;
-                    } else {
+                    }
+                    else
+                    {
                        prompt_tokens = tokenize(slot.prompt, system_prompt.empty());  // add BOS if there isn't system prompt
                    }