mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 10:54:36 +00:00
speculative : fix prompt tokenization in speculative example (#4025)
* Support special tokens and not adding BOS to prompt in speculative * Adapt to new should_add_bos function * Ensure tgt and dft have same add_bos setting
This commit is contained in:
parent
dae06c06e5
commit
40a34fe8d0
@ -94,9 +94,22 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// tokenize the prompt
|
|
||||||
|
// Tokenize the prompt
|
||||||
|
const bool add_bos_tgt = llama_should_add_bos_token(model_tgt);
|
||||||
|
LOG("add_bos tgt: %d\n", add_bos_tgt);
|
||||||
|
|
||||||
|
const bool add_bos_dft = llama_should_add_bos_token(model_dft);
|
||||||
|
LOG("add_bos dft: %d\n", add_bos_dft);
|
||||||
|
|
||||||
|
if (add_bos_tgt != add_bos_dft) {
|
||||||
|
fprintf(stderr, "%s: error: draft model add_bos must match target model to use speculation but ", __func__);
|
||||||
|
fprintf(stderr, "add_bos_dft = %d while add_bos_tgt = %d\n", add_bos_dft, add_bos_tgt);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<llama_token> inp;
|
std::vector<llama_token> inp;
|
||||||
inp = ::llama_tokenize(ctx_tgt, params.prompt, true);
|
inp = ::llama_tokenize(ctx_tgt, params.prompt, add_bos_tgt, true);
|
||||||
|
|
||||||
const int max_context_size = llama_n_ctx(ctx_tgt);
|
const int max_context_size = llama_n_ctx(ctx_tgt);
|
||||||
const int max_tokens_list_size = max_context_size - 4;
|
const int max_tokens_list_size = max_context_size - 4;
|
||||||
|
Loading…
Reference in New Issue
Block a user