mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-06 08:44:35 +00:00
server : fix non-transformer logic + remove response from /props
This commit is contained in:
parent
9ec6b49176
commit
0db72b63f5
@ -2082,10 +2082,13 @@ struct server_context {
|
|||||||
|
|
||||||
// keep only the common part
|
// keep only the common part
|
||||||
int p0 = slot.n_past;
|
int p0 = slot.n_past;
|
||||||
|
|
||||||
if (!llama_kv_cache_seq_rm(ctx, slot.id + 1, p0, -1)) {
|
if (!llama_kv_cache_seq_rm(ctx, slot.id + 1, p0, -1)) {
|
||||||
// could not partially delete (likely using a non-Transformer model)
|
// could not partially delete (likely using a non-Transformer model)
|
||||||
llama_kv_cache_seq_rm(ctx, slot.id + 1, -1, -1);
|
llama_kv_cache_seq_rm(ctx, slot.id + 1, -1, -1);
|
||||||
|
|
||||||
|
p0 = 0;
|
||||||
|
|
||||||
// there is no common part left
|
// there is no common part left
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.n_past_se = 0;
|
slot.n_past_se = 0;
|
||||||
@ -2773,7 +2776,6 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
|
const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
|
||||||
json data = {
|
json data = {
|
||||||
{ "system_prompt", "[unavailable]" },
|
|
||||||
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
|
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
|
||||||
{ "total_slots", ctx_server.params.n_parallel },
|
{ "total_slots", ctx_server.params.n_parallel },
|
||||||
{ "chat_template", llama_get_chat_template(ctx_server.model) },
|
{ "chat_template", llama_get_chat_template(ctx_server.model) },
|
||||||
|
Loading…
Reference in New Issue
Block a user