llama : fix command-r inference

2024-12-27 20:04:35 +00:00 · 2024-03-28 06:22:24 -04:00 · 2024-03-28 06:22:24 -04:00 · 64b7d85891
commit 64b7d85891
parent cfc4d75df6
1 changed files with 3 additions and 2 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -9152,8 +9152,9 @@ struct llm_build_context {
            if (il == n_layer - 1) {
                // skip computing output for unused tokens
                struct ggml_tensor * inp_out_ids = build_inp_out_ids();
-                cur  = ggml_get_rows(ctx0,  cur, inp_out_ids);
-                inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
+                cur     = ggml_get_rows(ctx0,     cur, inp_out_ids);
+                inpL    = ggml_get_rows(ctx0,    inpL, inp_out_ids);
+                ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
            }

            struct ggml_tensor * attn_out = cur;