llama : update offload functions for KQ tensors

2025-01-13 20:14:29 +00:00 · 2023-10-31 08:24:07 +02:00 · 2023-10-31 08:24:07 +02:00 · 6669cd8329
commit 6669cd8329
parent 2926ef63b1
1 changed files with 5 additions and 4 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -4856,12 +4856,13 @@ struct llm_offload_trie {
 static const std::unordered_map<const char *, llm_offload_func_e> k_offload_map = {
  //{ "inp_tokens",                 OFFLOAD_FUNC_NR  }, // TODO: missing K-quants get_rows kernel
  //{ "inp_embd",                   OFFLOAD_FUNC_NR  }, // TODO: missing K-quants get_rows kernel
-    { "inp_pos",                    OFFLOAD_FUNC_NR  },
    { "pos_embd",                   OFFLOAD_FUNC_NR  },

-    { "KQ_mask",                    OFFLOAD_FUNC_NR  },
-    { "K_shift",                    OFFLOAD_FUNC_NR  },
-    { "K_shifted",                  OFFLOAD_FUNC_NR  },
+    { "inp_pos",                    OFFLOAD_FUNC_KQ  }, // this is often used for KQ ops (e.g. rope)
+    { "KQ_scale",                   OFFLOAD_FUNC_KQ  },
+    { "KQ_mask",                    OFFLOAD_FUNC_KQ  },
+    { "K_shift",                    OFFLOAD_FUNC_KQ  },
+    { "K_shifted",                  OFFLOAD_FUNC_KQ  },

    { "inp_norm",                   OFFLOAD_FUNC_NR  },
    { "inp_norm_w",                 OFFLOAD_FUNC_NR  },