This commit is contained in:
M. Yusuf Sarıgöz 2023-08-12 07:25:10 +03:00
commit 186c496fdf
2 changed files with 12 additions and 12 deletions

View File

@ -26,8 +26,8 @@ def get_tensor_map( n_blocks : int):
tensor_map["output"] = mapped_to # llama-pth tensor_map["output"] = mapped_to # llama-pth
# Attention and fee-forward layer blocks # Attention and fee-forward layer blocks
for i in range(0,n_blocks): for i in range(0,n_blocks):
# Attention norm 1 # Attention norm
mapped_to = "transformer.blocks."+str(i)+".attn_norm_1" mapped_to = "transformer.blocks."+str(i)+".attn_norm"
tensor_map["gpt_neox.layers."+str(i)+".input_layernorm"] = mapped_to # gptneox tensor_map["gpt_neox.layers."+str(i)+".input_layernorm"] = mapped_to # gptneox
tensor_map["transformer.h."+str(i)+".ln_1"] = mapped_to # gpt2 tensor_map["transformer.h."+str(i)+".ln_1"] = mapped_to # gpt2
tensor_map["transformer.blocks."+str(i)+".norm_1"] = mapped_to # mpt tensor_map["transformer.blocks."+str(i)+".norm_1"] = mapped_to # mpt

View File

@ -565,8 +565,8 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
std::string blocknamestart = "transformer.blocks." + std::to_string(i) + "."; std::string blocknamestart = "transformer.blocks." + std::to_string(i) + ".";
layer.ln_1_g = get_tensor_ex(ctx, blocknamestart + "attn_norm_1.weight" ); layer.ln_1_g = get_tensor_ex(ctx, blocknamestart + "attn_norm.weight" );
layer.ln_1_b = get_tensor_ex(ctx, blocknamestart + "attn_norm_1.bias" ); layer.ln_1_b = get_tensor_ex(ctx, blocknamestart + "attn_norm.bias" );
layer.c_attn_attn_w = get_tensor_ex(ctx, blocknamestart + "attn_qkv.weight" ); layer.c_attn_attn_w = get_tensor_ex(ctx, blocknamestart + "attn_qkv.weight" );
layer.c_attn_attn_b = get_tensor_ex(ctx ,blocknamestart + "attn_qkv.bias" ); layer.c_attn_attn_b = get_tensor_ex(ctx ,blocknamestart + "attn_qkv.bias" );
@ -584,8 +584,8 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
layer.c_mlp_proj_b = get_tensor_ex(ctx, blocknamestart + "ffn_down.bias" ); layer.c_mlp_proj_b = get_tensor_ex(ctx, blocknamestart + "ffn_down.bias" );
// map by name // map by name
model.tensors[blocknamestart + "attn_norm_1.weight"] = layer.ln_1_g; model.tensors[blocknamestart + "attn_norm.weight"] = layer.ln_1_g;
model.tensors[blocknamestart + "attn_norm_1.bias"] = layer.ln_1_b; model.tensors[blocknamestart + "attn_norm.bias"] = layer.ln_1_b;
model.tensors[blocknamestart + "attn_qkv.weight"] = layer.c_attn_attn_w; model.tensors[blocknamestart + "attn_qkv.weight"] = layer.c_attn_attn_w;
model.tensors[blocknamestart + "attn_qkv.bias"] = layer.c_attn_attn_b; model.tensors[blocknamestart + "attn_qkv.bias"] = layer.c_attn_attn_b;