This commit is contained in:
M. Yusuf Sarıgöz 2023-08-12 07:25:10 +03:00
commit 186c496fdf
2 changed files with 12 additions and 12 deletions

View File

@ -26,15 +26,15 @@ def get_tensor_map( n_blocks : int):
tensor_map["output"] = mapped_to # llama-pth
# Attention and fee-forward layer blocks
for i in range(0,n_blocks):
# Attention norm 1
mapped_to = "transformer.blocks."+str(i)+".attn_norm_1"
# Attention norm
mapped_to = "transformer.blocks."+str(i)+".attn_norm"
tensor_map["gpt_neox.layers."+str(i)+".input_layernorm"] = mapped_to # gptneox
tensor_map["transformer.h."+str(i)+".ln_1"] = mapped_to # gpt2
tensor_map["transformer.blocks."+str(i)+".norm_1"] = mapped_to # mpt
tensor_map["transformer.h."+str(i)+".input_layernorm"] = mapped_to # falcon7b
tensor_map["transformer.h."+str(i)+".ln_attn"] = mapped_to # falcon40b
tensor_map["model.layers."+str(i)+".input_layernorm"] = mapped_to # llama-hf
tensor_map["layers."+str(i)+".attention_norm"] = mapped_to # llama-pth
tensor_map["transformer.h."+str(i)+".ln_1"] = mapped_to # gpt2
tensor_map["transformer.blocks."+str(i)+".norm_1"] = mapped_to # mpt
tensor_map["transformer.h."+str(i)+".input_layernorm"] = mapped_to # falcon7b
tensor_map["transformer.h."+str(i)+".ln_attn"] = mapped_to # falcon40b
tensor_map["model.layers."+str(i)+".input_layernorm"] = mapped_to # llama-hf
tensor_map["layers."+str(i)+".attention_norm"] = mapped_to # llama-pth
# Attention norm 2
mapped_to = "transformer.blocks."+str(i)+".attn_norm_2"
tensor_map["transformer.h."+str(i)+".ln_mlp"] = mapped_to # falcon40b

View File

@ -565,8 +565,8 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
std::string blocknamestart = "transformer.blocks." + std::to_string(i) + ".";
layer.ln_1_g = get_tensor_ex(ctx, blocknamestart + "attn_norm_1.weight" );
layer.ln_1_b = get_tensor_ex(ctx, blocknamestart + "attn_norm_1.bias" );
layer.ln_1_g = get_tensor_ex(ctx, blocknamestart + "attn_norm.weight" );
layer.ln_1_b = get_tensor_ex(ctx, blocknamestart + "attn_norm.bias" );
layer.c_attn_attn_w = get_tensor_ex(ctx, blocknamestart + "attn_qkv.weight" );
layer.c_attn_attn_b = get_tensor_ex(ctx ,blocknamestart + "attn_qkv.bias" );
@ -584,8 +584,8 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
layer.c_mlp_proj_b = get_tensor_ex(ctx, blocknamestart + "ffn_down.bias" );
// map by name
model.tensors[blocknamestart + "attn_norm_1.weight"] = layer.ln_1_g;
model.tensors[blocknamestart + "attn_norm_1.bias"] = layer.ln_1_b;
model.tensors[blocknamestart + "attn_norm.weight"] = layer.ln_1_g;
model.tensors[blocknamestart + "attn_norm.bias"] = layer.ln_1_b;
model.tensors[blocknamestart + "attn_qkv.weight"] = layer.c_attn_attn_w;
model.tensors[blocknamestart + "attn_qkv.bias"] = layer.c_attn_attn_b;