diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b3773d256..39cc3c6f8 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1778,6 +1778,9 @@ struct server_context { } slot->cache_tokens.resize(token_count); + // TODO: maybe detokenize the slot->cache_tokens instead? + slot->prompt = string_format("[restored %d tokens from file]", (int) token_count); + const int64_t t_end = ggml_time_us(); const double t_restore_ms = (t_end - t_start) / 1000.0;