mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
Steer with inpSA instead of with inpL
Signed-off-by: Henri Vasserman <henv@hot.ee>
This commit is contained in:
parent
1b0ff2cf6a
commit
7f59af52a9
@ -176,28 +176,27 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
if (!params.steering_add.empty() || !params.steering_sub.empty())
|
if (!params.steering_add.empty() || !params.steering_sub.empty())
|
||||||
{
|
{
|
||||||
params.steering_add.insert(0, 1, ' ');
|
|
||||||
params.steering_sub.insert(0, 1, ' ');
|
|
||||||
|
|
||||||
auto add_tokens = ::llama_tokenize(ctx, params.steering_add, true);
|
auto add_tokens = ::llama_tokenize(ctx, params.steering_add, true);
|
||||||
auto sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true);
|
auto sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true);
|
||||||
|
|
||||||
//if (add_tokens.size() != sub_tokens.size()) {
|
|
||||||
// while (add_tokens.size() < sub_tokens.size()) {
|
if (add_tokens.size() != sub_tokens.size()) {
|
||||||
// add_tokens.push_back(llama_token_nl());
|
while (add_tokens.size() < sub_tokens.size()) {
|
||||||
// }
|
add_tokens.push_back(llama_token_nl());
|
||||||
// while (sub_tokens.size() < add_tokens.size()) {
|
}
|
||||||
// sub_tokens.push_back(llama_token_nl());
|
while (sub_tokens.size() < add_tokens.size()) {
|
||||||
// }
|
sub_tokens.push_back(llama_token_nl());
|
||||||
//}
|
}
|
||||||
//const int N = embd_inp.size();
|
}
|
||||||
|
|
||||||
llama_set_steering_write(ctx, params.steering_source, +1.0f);
|
llama_set_steering_write(ctx, params.steering_source, +1.0f);
|
||||||
llama_eval(ctx, add_tokens.data(), std::min((int)add_tokens.size(), n_ctx), 0, params.n_threads);
|
llama_eval(ctx, add_tokens.data(), std::min((int)add_tokens.size(), n_ctx), 0, params.n_threads);
|
||||||
|
|
||||||
llama_set_steering_write(ctx, params.steering_layer, -1.0f);
|
llama_set_steering_write(ctx, params.steering_source, -1.0f);
|
||||||
llama_eval(ctx, sub_tokens.data(), std::min((int)sub_tokens.size(), n_ctx), 0, params.n_threads);
|
llama_eval(ctx, sub_tokens.data(), std::min((int)sub_tokens.size(), n_ctx), 0, params.n_threads);
|
||||||
|
|
||||||
llama_set_steering_read(ctx, params.steering_layer, params.steering_mul);
|
llama_set_steering_read(ctx, params.steering_layer, params.steering_mul);
|
||||||
|
std::cout << "Steering: `" << params.steering_add << "` - `" << params.steering_sub << "` * " << params.steering_mul << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
// debug message about similarity of saved session, if applicable
|
// debug message about similarity of saved session, if applicable
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
#define LLAMA_USE_SCRATCH
|
#define LLAMA_USE_SCRATCH
|
||||||
#define LLAMA_MAX_SCRATCH_BUFFERS 16
|
#define LLAMA_MAX_SCRATCH_BUFFERS 16
|
||||||
@ -1187,8 +1188,8 @@ static bool llama_eval_internal(
|
|||||||
ggml_add(ctx0, ggml_scale(ctx0, inpL, scal), steer), steer));
|
ggml_add(ctx0, ggml_scale(ctx0, inpL, scal), steer), steer));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// std::cout << "\nAdding steering vector to inpL " << il << "\n";
|
||||||
inpL = ggml_add(ctx0, ggml_scale(ctx0, steer, scal), inpL);
|
inpSA = ggml_add(ctx0, ggml_scale(ctx0, steer, scal), inpSA);
|
||||||
}
|
}
|
||||||
|
|
||||||
// norm
|
// norm
|
||||||
|
Loading…
Reference in New Issue
Block a user