From d0a7bf9382782368b57e68585b8926aa875a2f95 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 18 Sep 2024 21:20:21 +0300 Subject: [PATCH] llama : add classigication head (wip) [no ci] --- common/arg.cpp | 2 +- src/llama.cpp | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 6880117ed..885e982bb 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -391,7 +391,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex, [](gpt_params & params) { params.verbose_prompt = true; } - ).set_examples({LLAMA_EXAMPLE_MAIN})); + )); add_opt(llama_arg( {"--no-display-prompt"}, format("don't print prompt at generation (default: %s)", !params.display_prompt ? "true" : "false"), diff --git a/src/llama.cpp b/src/llama.cpp index ab0328ce8..86731bf68 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -11455,8 +11455,20 @@ struct llm_build_context { inpL = cur; } - // final output cur = inpL; + + // classification head + // https://github.com/huggingface/transformers/blob/5af7d41e49bbfc8319f462eb45253dcb3863dfb7/src/transformers/models/roberta/modeling_roberta.py#L1566 + // TODO: become pooling layer? + if (model.cls) { + cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.cls, cur), model.cls_b); + + cur = ggml_tanh(ctx0, cur); + + cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.cls_out, cur), model.cls_out_b); + // TODO: cur is now a scalar - what to do? + } + cb(cur, "result_embd", -1); ggml_build_forward_expand(gf, cur);