correct tensor patch

This commit is contained in:
ngxson 2024-07-06 13:29:37 +02:00
parent e9d7b6c05f
commit 4e28ad40a0
2 changed files with 14 additions and 23 deletions

View File

@ -19339,7 +19339,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
fprintf(fp, "digraph G {\n");
fprintf(fp, " newrank = true;\n");
fprintf(fp, " rankdir = LR;\n");
fprintf(fp, " rankdir = TB;\n");
for (int i = 0; i < gb->n_nodes; i++) {
struct ggml_tensor * node = gb->nodes[i];
@ -19401,7 +19401,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
}
fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
if (ggml_nelements(node) < 5) {
if (ggml_nelements(node) < 5 && node->data != NULL) {
fprintf(fp, " | (");
for (int j = 0; j < ggml_nelements(node); j++) {
if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {

View File

@ -18314,7 +18314,7 @@ static int llama_lora_adapter_init_internal(const struct llama_model & model, co
if (il == 0) n_tensors_per_layer++;
}
}
printf("n_tensors_per_layer %d\n", n_tensors_per_layer);
// printf("n_tensors_per_layer %d\n", n_tensors_per_layer);
// count layer buffer types
std::map<ggml_backend_buffer_type_t, int> buft_tensor_count;
@ -18363,6 +18363,8 @@ static int llama_lora_adapter_init_internal(const struct llama_model & model, co
} else {
ab_map[name].b = cur;
}
} else {
LLAMA_LOG_WARN("%s: discard tensor '%s'\n", __func__, cur->name);
}
}
@ -18400,14 +18402,14 @@ static int llama_lora_adapter_init_internal(const struct llama_model & model, co
adapter.bufs.reserve(ctx_map.size());
for (auto it : ctx_map) {
ggml_backend_buffer_type_t buft = it.first;
ggml_context * ctx = it.second;
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
ggml_context * ctx_dev = it.second;
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx_dev, buft);
if (!buf) {
LLAMA_LOG_ERROR("%s: failed to allocate buffer for lora adapter\n", __func__);
return -1;
}
ggml_backend_buffer_clear(buf, 0);
adapter.ctxs.push_back(ctx);
adapter.ctxs.push_back(ctx_dev);
adapter.bufs.push_back(buf);
}
}
@ -18424,8 +18426,8 @@ static int llama_lora_adapter_init_internal(const struct llama_model & model, co
}
gguf_file.seek(offs, SEEK_SET);
gguf_file.read_raw(read_buf.data(), size);
// LLAMA_LOG_INFO("%s: %s size=%ld\n", __func__, orig->name, size);
return ggml_backend_tensor_set(dev, read_buf.data(), 0, size);
// LLAMA_LOG_INFO("%s: %s size=%ld\n", __func__, dev->name, size);
ggml_backend_tensor_set(dev, read_buf.data(), 0, size);
};
for (auto & it : adapter.ab_map) {
auto orig = ab_map[it.first];
@ -18461,6 +18463,7 @@ static int32_t llama_lora_restore_tensors(struct llama_context & lctx) {
model.layers[il] = model.orig_layers[il]; // copy
}
}
return 0;
}
static int32_t llama_lora_patch_tensors(struct llama_context & lctx, struct ggml_context * ctx_build) {
@ -18498,8 +18501,8 @@ static int32_t llama_lora_patch_tensors(struct llama_context & lctx, struct ggml
cur = ggml_add(ctx_build, cur, *tensor);
// TODO: scale
ggml_format_name(cur, "%s.merged", name.c_str());
// LLAMA_LOG_INFO("LORA %s\n", cur->name);
tensor = &cur;
// LLAMA_LOG_INFO("LORA %p %s\n", cur, cur->name);
*tensor = cur;
}
};
for (auto adapter : lctx.lora_adapters) {
@ -18541,14 +18544,6 @@ static int32_t llama_lora_patch_tensors(struct llama_context & lctx, struct ggml
patch_tensor(adapter, &layer.wq_b);
patch_tensor(adapter, &layer.wkv_a_mqa);
patch_tensor(adapter, &layer.wkv_b);
patch_tensor(adapter, &layer.wq_cross);
patch_tensor(adapter, &layer.wk_cross);
patch_tensor(adapter, &layer.wv_cross);
patch_tensor(adapter, &layer.wo_cross);
patch_tensor(adapter, &layer.wq_enc);
patch_tensor(adapter, &layer.wk_enc);
patch_tensor(adapter, &layer.wv_enc);
patch_tensor(adapter, &layer.wo_enc);
patch_tensor(adapter, &layer.bq);
patch_tensor(adapter, &layer.bk);
@ -18556,10 +18551,6 @@ static int32_t llama_lora_patch_tensors(struct llama_context & lctx, struct ggml
patch_tensor(adapter, &layer.bo);
patch_tensor(adapter, &layer.bqkv);
patch_tensor(adapter, &layer.attn_rel_b);
patch_tensor(adapter, &layer.attn_rel_b_enc);
patch_tensor(adapter, &layer.attn_rel_b_cross);
patch_tensor(adapter, &layer.ffn_norm);
patch_tensor(adapter, &layer.ffn_norm_b);
patch_tensor(adapter, &layer.ffn_post_norm);