mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
server : fix infill prompt format
ggml-ci
This commit is contained in:
parent
e52522b869
commit
b8d1b1a5e1
@ -3380,7 +3380,20 @@ int main(int argc, char ** argv) {
|
||||
task.id = ctx_server.queue_tasks.get_new_id();
|
||||
task.index = i;
|
||||
|
||||
task.prompt_tokens = std::move(tokenized_prompts[i]);
|
||||
if (type == SERVER_TASK_TYPE_INFILL) {
|
||||
task.prompt_tokens = format_infill(
|
||||
ctx_server.ctx,
|
||||
data.at("input_prefix"),
|
||||
data.at("input_suffix"),
|
||||
data.at("input_extra"),
|
||||
ctx_server.params_base.n_batch,
|
||||
ctx_server.params_base.n_predict,
|
||||
ctx_server.slots[0].n_ctx, // TODO: there should be a better way
|
||||
ctx_server.params_base.spm_infill,
|
||||
tokenized_prompts[i]);
|
||||
} else {
|
||||
task.prompt_tokens = std::move(tokenized_prompts[i]);
|
||||
}
|
||||
task.params = server_task::params_from_json_cmpl(ctx_server.model, ctx_server.params_base, data);
|
||||
task.id_selected_slot = json_value(data, "id_slot", -1);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user