mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
add TBD
This commit is contained in:
parent
8bc76a225d
commit
101c578715
18
llama.cpp
18
llama.cpp
@ -3570,24 +3570,16 @@ static struct ggml_cgraph * llm_build_starcoder(
|
|||||||
offload_func(attn_norm->src[0]);
|
offload_func(attn_norm->src[0]);
|
||||||
offload_func(attn_norm);
|
offload_func(attn_norm);
|
||||||
|
|
||||||
if (model.layers[il].attn_norm_2) { // Falcon-40B
|
cur = attn_norm;
|
||||||
cur = ggml_norm(ctx0, inpL, norm_eps);
|
|
||||||
offload_func(cur);
|
|
||||||
|
|
||||||
cur = ggml_add(ctx0,
|
|
||||||
ggml_mul(ctx0, cur, model.layers[il].attn_norm_2),
|
|
||||||
model.layers[il].attn_norm_2_b);
|
|
||||||
offload_func(cur->src[0]);
|
|
||||||
offload_func(cur);
|
|
||||||
} else { // Falcon 7B
|
|
||||||
cur = attn_norm;
|
|
||||||
}
|
|
||||||
|
|
||||||
// compute QKV
|
// compute QKV
|
||||||
|
|
||||||
cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, cur);
|
cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, cur);
|
||||||
offload_func_kq(cur);
|
offload_func_kq(cur);
|
||||||
|
|
||||||
|
// ===== TBD (QKV Split + FF) ====
|
||||||
|
#define PRINT_SHAPE(x) fprintf(stderr, "%d %s: (%s)\n", __LINE__, #x, llama_format_tensor_shape(x).c_str())
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
|
||||||
// Note that the strides for Kcur, Vcur are set up so that the
|
// Note that the strides for Kcur, Vcur are set up so that the
|
||||||
// resulting views are misaligned with the tensor's storage
|
// resulting views are misaligned with the tensor's storage
|
||||||
// (by applying the K/V offset we shift the tensor's original
|
// (by applying the K/V offset we shift the tensor's original
|
||||||
|
Loading…
Reference in New Issue
Block a user