mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 20:04:35 +00:00
fixed tokens probs
This commit is contained in:
parent
9d98cdda2c
commit
de35b47908
@ -3,7 +3,7 @@
|
|||||||
#include "build-info.h"
|
#include "build-info.h"
|
||||||
#include "grammar-parser.h"
|
#include "grammar-parser.h"
|
||||||
|
|
||||||
// #define SERVER_MULTIMODAL_SUPPORT
|
#define SERVER_MULTIMODAL_SUPPORT
|
||||||
|
|
||||||
#ifdef SERVER_MULTIMODAL_SUPPORT
|
#ifdef SERVER_MULTIMODAL_SUPPORT
|
||||||
#include "../llava/clip.h"
|
#include "../llava/clip.h"
|
||||||
@ -746,10 +746,6 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
// add the token to slot queue and cache
|
// add the token to slot queue and cache
|
||||||
slot.addTokenString(result);
|
slot.addTokenString(result);
|
||||||
if (slot.sparams.n_probs > 0)
|
|
||||||
{
|
|
||||||
slot.generated_token_probs.push_back(result);
|
|
||||||
}
|
|
||||||
if (slot.multibyte_pending > 0)
|
if (slot.multibyte_pending > 0)
|
||||||
{
|
{
|
||||||
slot.multibyte_pending -= token_str.size();
|
slot.multibyte_pending -= token_str.size();
|
||||||
@ -1009,13 +1005,13 @@ struct llama_server_context
|
|||||||
#ifdef SERVER_MULTIMODAL_SUPPORT
|
#ifdef SERVER_MULTIMODAL_SUPPORT
|
||||||
std::vector<llama_token> preffix_tokens = ingest_image ? tokenize(slot.params.input_prefix, true) : prompt_tokens;
|
std::vector<llama_token> preffix_tokens = ingest_image ? tokenize(slot.params.input_prefix, true) : prompt_tokens;
|
||||||
for (; slot.n_past < preffix_tokens.size(); ++slot.n_past) {
|
for (; slot.n_past < preffix_tokens.size(); ++slot.n_past) {
|
||||||
printf(llama_token_to_piece(ctx, preffix_tokens[slot.n_past]).c_str());
|
|
||||||
batch.token [batch.n_tokens] = preffix_tokens[slot.n_past];
|
batch.token [batch.n_tokens] = preffix_tokens[slot.n_past];
|
||||||
batch.pos [batch.n_tokens] = slot.n_past + num_tokens_system;
|
batch.pos [batch.n_tokens] = slot.n_past + num_tokens_system;
|
||||||
batch.seq_id[batch.n_tokens] = slot.id;
|
batch.seq_id[batch.n_tokens] = slot.id;
|
||||||
batch.logits[batch.n_tokens] = false;
|
batch.logits[batch.n_tokens] = false;
|
||||||
batch.n_tokens += 1;
|
batch.n_tokens += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(ingest_image) {
|
if(ingest_image) {
|
||||||
// process preffix prompt
|
// process preffix prompt
|
||||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
|
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
|
||||||
@ -1035,8 +1031,6 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("\nEvaluated preffix prompt: %i\n", slot.n_past);
|
|
||||||
|
|
||||||
// process image
|
// process image
|
||||||
for (int i = 0; i < slot.image_tokens; i += n_batch) {
|
for (int i = 0; i < slot.image_tokens; i += n_batch) {
|
||||||
int n_eval = slot.image_tokens - i;
|
int n_eval = slot.image_tokens - i;
|
||||||
@ -1050,13 +1044,11 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
slot.n_past += n_eval;
|
slot.n_past += n_eval;
|
||||||
}
|
}
|
||||||
printf("Evaluated image embedding: %i\n", slot.n_past);
|
|
||||||
|
|
||||||
// process suffix prompt
|
// process suffix prompt
|
||||||
batch.n_tokens = 0;
|
batch.n_tokens = 0;
|
||||||
std::vector<llama_token> suffix_tokens = tokenize(slot.params.input_suffix, true);
|
std::vector<llama_token> suffix_tokens = tokenize(slot.params.input_suffix, true);
|
||||||
for (int i = 0; i < suffix_tokens.size(); ++i) {
|
for (int i = 0; i < suffix_tokens.size(); ++i) {
|
||||||
printf(llama_token_to_piece(ctx, suffix_tokens[i]).c_str());
|
|
||||||
batch.token [batch.n_tokens] = suffix_tokens[i];
|
batch.token [batch.n_tokens] = suffix_tokens[i];
|
||||||
batch.pos [batch.n_tokens] = slot.n_past;
|
batch.pos [batch.n_tokens] = slot.n_past;
|
||||||
batch.seq_id[batch.n_tokens] = slot.id;
|
batch.seq_id[batch.n_tokens] = slot.id;
|
||||||
@ -1064,7 +1056,6 @@ struct llama_server_context
|
|||||||
slot.n_past += 1;
|
slot.n_past += 1;
|
||||||
batch.n_tokens += 1;
|
batch.n_tokens += 1;
|
||||||
}
|
}
|
||||||
printf("\nEvaluated suffix prompt: %i\n", slot.n_past);
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (; slot.n_past < prompt_tokens.size(); ++slot.n_past) {
|
for (; slot.n_past < prompt_tokens.size(); ++slot.n_past) {
|
||||||
|
Loading…
Reference in New Issue
Block a user