mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 20:14:29 +00:00
Merge branch 'server-rev' of https://github.com//ggerganov/llama.cpp into server-rev
This commit is contained in:
commit
a4d69d8b81
File diff suppressed because it is too large
Load Diff
@ -413,7 +413,7 @@
|
||||
currentMessages.push(data);
|
||||
slot_id = data.slot_id;
|
||||
if (selected_image && !data.multimodal) {
|
||||
alert("The server was no compiled for multimodal or the model projector can't be loaded.");
|
||||
alert("The server was not compiled for multimodal or the model projector can't be loaded.");
|
||||
return;
|
||||
}
|
||||
transcriptUpdate([...history, [char, currentMessages]])
|
||||
@ -470,6 +470,7 @@
|
||||
transcriptUpdate([...session.value.transcript, ["", prompt]]);
|
||||
await runLlama(prompt, {
|
||||
...params.value,
|
||||
slot_id: slot_id,
|
||||
stop: [],
|
||||
}, "");
|
||||
}
|
||||
|
@ -655,6 +655,7 @@ struct llama_server_context
|
||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||
slot_params default_params;
|
||||
llama_sampling_params default_sparams;
|
||||
|
||||
slot->params.stream = json_value(data, "stream", false);
|
||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||
@ -1515,7 +1516,9 @@ struct llama_server_context
|
||||
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
||||
prefix_tokens.push_back(llama_token_middle(ctx));
|
||||
prompt_tokens = prefix_tokens;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user