diff --git a/llama.cpp b/llama.cpp index c028c9023..9c1b2a93e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4419,7 +4419,7 @@ struct llama_context * llama_new_context_with_model( if (ggml_mpi_rank(ctx->ctx_mpi) > 0) { // Enter a blocking eval loop with dummy input, letting rank=0 drive the process - const std::vector tmp(ctx->model.hparams.n_ctx, llama_token_bos()); + const std::vector tmp(ctx->model.hparams.n_ctx, llama_token_bos(ctx)); while (!llama_eval(ctx, tmp.data(), tmp.size(), 0, 0)) {}; llama_backend_free(); exit(1);