From b8d1b1a5e18875207212005778c1343e42ed4606 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 8 Dec 2024 22:12:11 +0200 Subject: [PATCH] server : fix infill prompt format ggml-ci --- examples/server/server.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 1d9c0533d..81221a1f1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3380,7 +3380,20 @@ int main(int argc, char ** argv) { task.id = ctx_server.queue_tasks.get_new_id(); task.index = i; - task.prompt_tokens = std::move(tokenized_prompts[i]); + if (type == SERVER_TASK_TYPE_INFILL) { + task.prompt_tokens = format_infill( + ctx_server.ctx, + data.at("input_prefix"), + data.at("input_suffix"), + data.at("input_extra"), + ctx_server.params_base.n_batch, + ctx_server.params_base.n_predict, + ctx_server.slots[0].n_ctx, // TODO: there should be a better way + ctx_server.params_base.spm_infill, + tokenized_prompts[i]); + } else { + task.prompt_tokens = std::move(tokenized_prompts[i]); + } task.params = server_task::params_from_json_cmpl(ctx_server.model, ctx_server.params_base, data); task.id_selected_slot = json_value(data, "id_slot", -1);