mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 19:04:35 +00:00
simple : fix batch handling (#3803)
This commit is contained in:
parent
2f9ec7e271
commit
c8d6a1f34a
@ -95,13 +95,8 @@ int main(int argc, char ** argv) {
|
|||||||
llama_batch batch = llama_batch_init(512, 0, 1);
|
llama_batch batch = llama_batch_init(512, 0, 1);
|
||||||
|
|
||||||
// evaluate the initial prompt
|
// evaluate the initial prompt
|
||||||
batch.n_tokens = tokens_list.size();
|
for (size_t i = 0; i < tokens_list.size(); i++) {
|
||||||
|
llama_batch_add(batch, tokens_list[i], i, { 0 }, false);
|
||||||
for (int32_t i = 0; i < batch.n_tokens; i++) {
|
|
||||||
batch.token[i] = tokens_list[i];
|
|
||||||
batch.pos[i] = i;
|
|
||||||
batch.seq_id[i] = 0;
|
|
||||||
batch.logits[i] = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// llama_decode will output logits only for the last token of the prompt
|
// llama_decode will output logits only for the last token of the prompt
|
||||||
@ -148,15 +143,10 @@ int main(int argc, char ** argv) {
|
|||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
// prepare the next batch
|
// prepare the next batch
|
||||||
batch.n_tokens = 0;
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
// push this new token for next evaluation
|
// push this new token for next evaluation
|
||||||
batch.token [batch.n_tokens] = new_token_id;
|
llama_batch_add(batch, new_token_id, n_cur, { 0 }, true);
|
||||||
batch.pos [batch.n_tokens] = n_cur;
|
|
||||||
batch.seq_id[batch.n_tokens] = 0;
|
|
||||||
batch.logits[batch.n_tokens] = true;
|
|
||||||
|
|
||||||
batch.n_tokens += 1;
|
|
||||||
|
|
||||||
n_decode += 1;
|
n_decode += 1;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user