mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
llama : minor style
This commit is contained in:
parent
8f1d81a0b6
commit
c6d4cb4655
@ -8711,8 +8711,7 @@ static void llm_build_kv_store(
|
|||||||
|
|
||||||
GGML_ASSERT(kv.size == n_ctx);
|
GGML_ASSERT(kv.size == n_ctx);
|
||||||
|
|
||||||
struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_k_gqa,
|
struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_k_gqa, ggml_row_size(kv.k_l[il]->type, n_embd_k_gqa)*kv_head);
|
||||||
(ggml_row_size(kv.k_l[il]->type, n_embd_k_gqa))*kv_head);
|
|
||||||
cb(k_cache_view, "k_cache_view", il);
|
cb(k_cache_view, "k_cache_view", il);
|
||||||
|
|
||||||
// note: storing RoPE-ed version of K in the KV cache
|
// note: storing RoPE-ed version of K in the KV cache
|
||||||
@ -8723,8 +8722,7 @@ static void llm_build_kv_store(
|
|||||||
struct ggml_tensor * v_cache_view = nullptr;
|
struct ggml_tensor * v_cache_view = nullptr;
|
||||||
|
|
||||||
if (cparams.flash_attn) {
|
if (cparams.flash_attn) {
|
||||||
v_cache_view = ggml_view_1d(ctx, kv.v_l[il], n_tokens*n_embd_v_gqa,
|
v_cache_view = ggml_view_1d(ctx, kv.v_l[il], n_tokens*n_embd_v_gqa, ggml_row_size(kv.v_l[il]->type, n_embd_v_gqa)*kv_head);
|
||||||
(kv_head)*ggml_row_size(kv.v_l[il]->type, n_embd_v_gqa));
|
|
||||||
} else {
|
} else {
|
||||||
// note: the V cache is transposed when not using flash attention
|
// note: the V cache is transposed when not using flash attention
|
||||||
v_cache_view = ggml_view_2d(ctx, kv.v_l[il], n_tokens, n_embd_v_gqa,
|
v_cache_view = ggml_view_2d(ctx, kv.v_l[il], n_tokens, n_embd_v_gqa,
|
||||||
@ -9211,8 +9209,7 @@ static struct ggml_tensor * llm_build_kv(
|
|||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
|
|
||||||
cur = llm_build_kqv(ctx, lctx, kv, graph, wo, wo_b,
|
cur = llm_build_kqv(ctx, lctx, kv, graph, wo, wo_b, q_cur, kq_mask, n_tokens, n_kv, kq_scale, cb, il);
|
||||||
q_cur, kq_mask, n_tokens, n_kv, kq_scale, cb, il);
|
|
||||||
cb(cur, "kqv_out", il);
|
cb(cur, "kqv_out", il);
|
||||||
|
|
||||||
return cur;
|
return cur;
|
||||||
|
Loading…
Reference in New Issue
Block a user