llama : fix command-r inference when omitting outputs (#6367)

This commit is contained in:
compilade 2024-03-28 08:05:54 -04:00 committed by GitHub
parent 28cb9a09c4
commit 0308f5e3d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -9152,8 +9152,9 @@ struct llm_build_context {
if (il == n_layer - 1) { if (il == n_layer - 1) {
// skip computing output for unused tokens // skip computing output for unused tokens
struct ggml_tensor * inp_out_ids = build_inp_out_ids(); struct ggml_tensor * inp_out_ids = build_inp_out_ids();
cur = ggml_get_rows(ctx0, cur, inp_out_ids); cur = ggml_get_rows(ctx0, cur, inp_out_ids);
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
} }
struct ggml_tensor * attn_out = cur; struct ggml_tensor * attn_out = cur;