From 90ab8a10d58ff2f67288dc5c61a5cfc127a23535 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 20 Oct 2024 20:15:59 +0300 Subject: [PATCH] speculative : limit batch size to llama_n_batch --- examples/speculative/speculative.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index ff137682d..8a6475415 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -195,8 +195,8 @@ int main(int argc, char ** argv) { drafts[s].smpl = common_sampler_init(model_dft, params.sparams); } - llama_batch batch_dft = llama_batch_init(llama_n_ctx(ctx_dft), 0, 1); - llama_batch batch_tgt = llama_batch_init(llama_n_ctx(ctx_tgt), 0, n_seq_dft); + llama_batch batch_dft = llama_batch_init(llama_n_batch(ctx_dft), 0, 1); + llama_batch batch_tgt = llama_batch_init(llama_n_batch(ctx_tgt), 0, n_seq_dft); const auto t_dec_start = ggml_time_us();