mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-11 21:39:52 +00:00
llama : fix command-r inference when omitting outputs (#6367)
This commit is contained in:
parent
28cb9a09c4
commit
0308f5e3d7
@ -9152,8 +9152,9 @@ struct llm_build_context {
|
||||
if (il == n_layer - 1) {
|
||||
// skip computing output for unused tokens
|
||||
struct ggml_tensor * inp_out_ids = build_inp_out_ids();
|
||||
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
||||
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
|
||||
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
||||
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
|
||||
ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
|
||||
}
|
||||
|
||||
struct ggml_tensor * attn_out = cur;
|
||||
|
Loading…
Reference in New Issue
Block a user