mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
parent
fadde67135
commit
f8d6a23804
@ -5312,7 +5312,7 @@ void ggml_mul_mat_set_prec(
|
||||
as -> [cols, rows, n_expert]
|
||||
ids -> [n_experts_used, n_tokens] (i32)
|
||||
b -> [cols, n_expert_used, n_tokens]
|
||||
c -> [cols, n_expert_used, n_tokens]
|
||||
c -> [rows, n_expert_used, n_tokens]
|
||||
|
||||
in b, n_experts_used can be broadcasted to match the n_expert_used of ids
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user