cleanup and stuff

This commit is contained in:
Henri Vasserman 2023-05-16 15:16:00 +03:00
parent 021e6d9944
commit 8388aaa604
No known key found for this signature in database
GPG Key ID: 2995FC0F58B1A986
4 changed files with 48 additions and 37 deletions

View File

@ -362,12 +362,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
break;
}
params.steering_mul = std::stof(argv[i]);
} else if (arg == "--steering-lyr") {
} else if (arg == "--steering-layer") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.steering_lyr = std::stoi(argv[i]);
params.steering_layer = std::stoi(argv[i]);
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
gpt_print_usage(argc, argv, default_params);
@ -454,6 +454,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
}
fprintf(stderr, " -ngl N, --n-gpu-layers N\n");
fprintf(stderr, " number of layers to store in VRAM\n");
fprintf(stderr, " --steering-add add positive steering prompt\n");
fprintf(stderr, " --steering-sub add negativ steering prompt\n");
fprintf(stderr, " --steering-mul set steering strength (negative is reverse, default %.1f)\n", params.steering_mul);
fprintf(stderr, " --steering-layer set layer for steering (default %d)\n", params.steering_layer);
fprintf(stderr, " --mtest compute maximum memory usage\n");
fprintf(stderr, " --verbose-prompt print prompt before generation\n");
fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");

View File

@ -73,10 +73,10 @@ struct gpt_params {
bool mem_test = false; // compute maximum memory usage
bool verbose_prompt = false; // print prompt tokens before generation
std::string steering_add = "";
std::string steering_sub = "";
std::string steering_add;
std::string steering_sub;
float steering_mul = 1.0f;
int steering_lyr = 20;
int steering_layer = 15;
};
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);

View File

@ -136,28 +136,6 @@ int main(int argc, char ** argv) {
return 0;
}
if (params.steering_add.size() || params.steering_sub.size())
{
auto steering_add_tokens = ::llama_tokenize(ctx, params.steering_add, true);
auto steering_sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true);
if (steering_add_tokens.size() != steering_sub_tokens.size()) {
llama_token space;
llama_tokenize(ctx, " ", &space, 1, 0);
while (steering_add_tokens.size() < steering_sub_tokens.size()) steering_add_tokens.push_back(space);
while (steering_sub_tokens.size() < steering_add_tokens.size()) steering_sub_tokens.push_back(space);
}
llama_set_steering_write(ctx, params.steering_lyr, params.steering_mul/2);
llama_eval(ctx, steering_add_tokens.data(), std::min((int)steering_add_tokens.size(), params.n_ctx), 0, params.n_threads);
llama_set_steering_write(ctx, params.steering_lyr, -params.steering_mul/2);
llama_eval(ctx, steering_sub_tokens.data(), std::min((int)steering_sub_tokens.size(), params.n_ctx), 0, params.n_threads);
llama_set_steering_read(ctx, params.steering_lyr, 1);
}
// Add a space in front of the first character to match OG llama tokenizer behavior
params.prompt.insert(0, 1, ' ');
@ -196,6 +174,32 @@ int main(int argc, char ** argv) {
return 1;
}
if (!params.steering_add.empty() || !params.steering_sub.empty())
{
params.steering_add.insert(0, 1, ' ');
params.steering_sub.insert(0, 1, ' ');
auto add_tokens = ::llama_tokenize(ctx, params.steering_add, true);
auto sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true);
//if (add_tokens.size() != sub_tokens.size()) {
// while (add_tokens.size() < sub_tokens.size()) {
// add_tokens.push_back(llama_token_nl());
// }
// while (sub_tokens.size() < add_tokens.size()) {
// sub_tokens.push_back(llama_token_nl());
// }
//}
//const int N = embd_inp.size();
llama_set_steering_write(ctx, params.steering_layer, +1.0f);
llama_eval(ctx, add_tokens.data(), std::min((int)add_tokens.size(), n_ctx), 0, params.n_threads);
llama_set_steering_write(ctx, params.steering_layer, -1.0f);
llama_eval(ctx, sub_tokens.data(), std::min((int)sub_tokens.size(), n_ctx), 0, params.n_threads);
llama_set_steering_read(ctx, params.steering_layer, params.steering_mul);
}
// debug message about similarity of saved session, if applicable
size_t n_matching_session_tokens = 0;
if (session_tokens.size()) {

View File

@ -287,6 +287,9 @@ void llama_set_steering_read(struct llama_context * ctx, int layer, float mul) {
ctx->steering_mode = STEERING_READ;
ctx->steering_mul = mul;
ctx->steering_layer = layer;
//FILE* steeringbin = fopen("steering.bin", "wb");
//fwrite(ctx->steering_vector.data(), sizeof(float), ctx->steering_vector.size(), steeringbin);
//fclose(steeringbin);
}
template <typename T>
@ -1163,8 +1166,9 @@ static bool llama_eval_internal(
struct ggml_tensor * steer;
if (lctx.steering_mode != STEERING_OFF) {
steer = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_ctx, n_embd);
memcpy(steer->data, lctx.steering_vector.data(), ggml_nbytes(steer));
steer = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
//steer->data = lctx.steering_vector.data() + n_past * n_embd * sizeof(float);
memcpy(steer->data, lctx.steering_vector.data() + n_past * n_embd * sizeof(float), ggml_nbytes(steer));
}
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
@ -1177,15 +1181,14 @@ static bool llama_eval_internal(
lctx.use_buf(ctx0, 0);
if (lctx.steering_mode != STEERING_OFF && il == lctx.steering_layer) {
steer->data = lctx.steering_vector.data();
struct ggml_tensor * src = ggml_scale(ctx0, inpL, ggml_new_f32(ctx0, lctx.steering_mul));
struct ggml_tensor * dst = ggml_view_2d(ctx0, steer, n_embd, N, n_embd * sizeof(float), n_past * n_embd * sizeof(float));
struct ggml_tensor * scal = ggml_new_f32(ctx0, lctx.steering_mul);
if (lctx.steering_mode == STEERING_WRITE) {
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, ggml_add(ctx0, src, dst), dst));
} else {
inpL = src;
ggml_build_forward_expand(&gf, ggml_cpy(ctx0,
ggml_add(ctx0, ggml_scale(ctx0, inpL, scal), steer), steer));
break;
}
inpL = ggml_add(ctx0, ggml_scale(ctx0, steer, scal), inpL);
}
// norm
@ -1403,7 +1406,7 @@ static bool llama_eval_internal(
if (lctx.steering_mode == STEERING_WRITE) {
memcpy(lctx.steering_vector.data(), steer->data, ggml_nbytes(steer));
memcpy(lctx.steering_vector.data() + n_past * n_embd * sizeof(float), steer->data, ggml_nbytes(steer));
}