mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 20:14:29 +00:00
llama : update offload functions for KQ tensors
This commit is contained in:
parent
2926ef63b1
commit
6669cd8329
@ -4856,12 +4856,13 @@ struct llm_offload_trie {
|
||||
static const std::unordered_map<const char *, llm_offload_func_e> k_offload_map = {
|
||||
//{ "inp_tokens", OFFLOAD_FUNC_NR }, // TODO: missing K-quants get_rows kernel
|
||||
//{ "inp_embd", OFFLOAD_FUNC_NR }, // TODO: missing K-quants get_rows kernel
|
||||
{ "inp_pos", OFFLOAD_FUNC_NR },
|
||||
{ "pos_embd", OFFLOAD_FUNC_NR },
|
||||
|
||||
{ "KQ_mask", OFFLOAD_FUNC_NR },
|
||||
{ "K_shift", OFFLOAD_FUNC_NR },
|
||||
{ "K_shifted", OFFLOAD_FUNC_NR },
|
||||
{ "inp_pos", OFFLOAD_FUNC_KQ }, // this is often used for KQ ops (e.g. rope)
|
||||
{ "KQ_scale", OFFLOAD_FUNC_KQ },
|
||||
{ "KQ_mask", OFFLOAD_FUNC_KQ },
|
||||
{ "K_shift", OFFLOAD_FUNC_KQ },
|
||||
{ "K_shifted", OFFLOAD_FUNC_KQ },
|
||||
|
||||
{ "inp_norm", OFFLOAD_FUNC_NR },
|
||||
{ "inp_norm_w", OFFLOAD_FUNC_NR },
|
||||
|
Loading…
Reference in New Issue
Block a user