mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
We could use std::unordered_map over std::map (#305)
* Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token; * fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size()); * Removed include <map> * Nest struct token score inside gpt_vocab * renamed token to tok
This commit is contained in:
parent
89d5d90f3b
commit
353ec251a4
18
main.cpp
18
main.cpp
@ -9,7 +9,6 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -69,7 +68,7 @@ void set_console_state(console_state new_st)
|
|||||||
static const int EOS_TOKEN_ID = 2;
|
static const int EOS_TOKEN_ID = 2;
|
||||||
|
|
||||||
// determine number of model parts based on the dimension
|
// determine number of model parts based on the dimension
|
||||||
static const std::map<int, int> LLAMA_N_PARTS = {
|
static const std::unordered_map<int, int> LLAMA_N_PARTS = {
|
||||||
{ 4096, 1 },
|
{ 4096, 1 },
|
||||||
{ 5120, 2 },
|
{ 5120, 2 },
|
||||||
{ 6656, 4 },
|
{ 6656, 4 },
|
||||||
@ -123,7 +122,7 @@ struct llama_model {
|
|||||||
|
|
||||||
//
|
//
|
||||||
struct ggml_context * ctx;
|
struct ggml_context * ctx;
|
||||||
std::map<std::string, struct ggml_tensor *> tensors;
|
std::unordered_map<std::string, struct ggml_tensor *> tensors;
|
||||||
};
|
};
|
||||||
|
|
||||||
// load the model's weights from a file
|
// load the model's weights from a file
|
||||||
@ -208,6 +207,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
|
|||||||
// load vocab
|
// load vocab
|
||||||
{
|
{
|
||||||
std::string word;
|
std::string word;
|
||||||
|
vocab.id_to_token.resize(model.hparams.n_vocab);
|
||||||
std::vector<char> tmp(64);
|
std::vector<char> tmp(64);
|
||||||
|
|
||||||
for (int i = 0; i < model.hparams.n_vocab; i++) {
|
for (int i = 0; i < model.hparams.n_vocab; i++) {
|
||||||
@ -227,8 +227,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
|
|||||||
fin.read((char *) &score, sizeof(score));
|
fin.read((char *) &score, sizeof(score));
|
||||||
|
|
||||||
vocab.token_to_id[word] = i;
|
vocab.token_to_id[word] = i;
|
||||||
vocab.id_to_token[i] = word;
|
|
||||||
vocab.score[i] = score;
|
auto &tok_score = vocab.id_to_token[i];
|
||||||
|
tok_score.tok = word;
|
||||||
|
tok_score.score = score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1028,7 +1030,7 @@ int main(int argc, char ** argv) {
|
|||||||
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||||
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
||||||
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
||||||
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
|
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str());
|
||||||
}
|
}
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
if (params.interactive) {
|
if (params.interactive) {
|
||||||
@ -1154,7 +1156,7 @@ int main(int argc, char ** argv) {
|
|||||||
// display text
|
// display text
|
||||||
if (!input_noecho) {
|
if (!input_noecho) {
|
||||||
for (auto id : embd) {
|
for (auto id : embd) {
|
||||||
printf("%s", vocab.id_to_token[id].c_str());
|
printf("%s", vocab.id_to_token[id].tok.c_str());
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
@ -1169,7 +1171,7 @@ int main(int argc, char ** argv) {
|
|||||||
// check for reverse prompt
|
// check for reverse prompt
|
||||||
std::string last_output;
|
std::string last_output;
|
||||||
for (auto id : last_n_tokens) {
|
for (auto id : last_n_tokens) {
|
||||||
last_output += vocab.id_to_token[id];
|
last_output += vocab.id_to_token[id].tok;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if each of the reverse prompts appears at the end of the output.
|
// Check if each of the reverse prompts appears at the end of the output.
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <map>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
@ -130,6 +129,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string word;
|
std::string word;
|
||||||
|
vocab.id_to_token.resize(n_vocab);
|
||||||
for (int i = 0; i < n_vocab; i++) {
|
for (int i = 0; i < n_vocab; i++) {
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
finp.read ((char *) &len, sizeof(len));
|
finp.read ((char *) &len, sizeof(len));
|
||||||
@ -144,8 +144,10 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
|
|||||||
fout.write((char *) &score, sizeof(score));
|
fout.write((char *) &score, sizeof(score));
|
||||||
|
|
||||||
vocab.token_to_id[word] = i;
|
vocab.token_to_id[word] = i;
|
||||||
vocab.id_to_token[i] = word;
|
|
||||||
vocab.score[i] = score;
|
auto &tok_score = vocab.id_to_token[i];
|
||||||
|
tok_score.tok = word;
|
||||||
|
tok_score.score = score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
20
utils.cpp
20
utils.cpp
@ -155,8 +155,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string, int32_t> json_parse(const std::string & fname) {
|
std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
|
||||||
std::map<std::string, int32_t> result;
|
std::unordered_map<std::string, int32_t> result;
|
||||||
|
|
||||||
// read file into string
|
// read file into string
|
||||||
std::string json;
|
std::string json;
|
||||||
@ -360,16 +360,16 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto score = vocab_.score.find((*token).second);
|
if (static_cast<size_t>((*token).second) >= vocab_.id_to_token.size()) {
|
||||||
|
|
||||||
if (score == vocab_.score.end()) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto &tok_score = vocab_.id_to_token[(*token).second];
|
||||||
|
|
||||||
llama_sp_bigram bigram;
|
llama_sp_bigram bigram;
|
||||||
bigram.left = left;
|
bigram.left = left;
|
||||||
bigram.right = right;
|
bigram.right = right;
|
||||||
bigram.score = (*score).second;
|
bigram.score = tok_score.score;
|
||||||
bigram.size = text.size();
|
bigram.size = text.size();
|
||||||
work_queue_.push(bigram);
|
work_queue_.push(bigram);
|
||||||
}
|
}
|
||||||
@ -393,6 +393,8 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
|
|||||||
std::string word;
|
std::string word;
|
||||||
std::vector<char> tmp(64);
|
std::vector<char> tmp(64);
|
||||||
|
|
||||||
|
vocab.id_to_token.resize(n_vocab);
|
||||||
|
|
||||||
for (int i = 0; i < n_vocab; i++) {
|
for (int i = 0; i < n_vocab; i++) {
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
fin.read((char *) &len, sizeof(len));
|
fin.read((char *) &len, sizeof(len));
|
||||||
@ -410,8 +412,10 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
|
|||||||
fin.read((char *) &score, sizeof(score));
|
fin.read((char *) &score, sizeof(score));
|
||||||
|
|
||||||
vocab.token_to_id[word] = i;
|
vocab.token_to_id[word] = i;
|
||||||
vocab.id_to_token[i] = word;
|
|
||||||
vocab.score[i] = score;
|
auto &tok_score = vocab.id_to_token[i];
|
||||||
|
tok_score.tok = word;
|
||||||
|
tok_score.score = score;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
14
utils.h
14
utils.h
@ -3,7 +3,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <random>
|
#include <random>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
@ -65,15 +65,19 @@ struct llama_vocab {
|
|||||||
using id = int32_t;
|
using id = int32_t;
|
||||||
using token = std::string;
|
using token = std::string;
|
||||||
|
|
||||||
std::map<token, id> token_to_id;
|
struct token_score {
|
||||||
std::map<id, token> id_to_token;
|
token tok;
|
||||||
std::map<id, float> score;
|
float score;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unordered_map<token, id> token_to_id;
|
||||||
|
std::vector<token_score> id_to_token;
|
||||||
};
|
};
|
||||||
|
|
||||||
void replace(std::string & str, const std::string & needle, const std::string & replacement);
|
void replace(std::string & str, const std::string & needle, const std::string & replacement);
|
||||||
|
|
||||||
// poor-man's JSON parsing
|
// poor-man's JSON parsing
|
||||||
std::map<std::string, int32_t> json_parse(const std::string & fname);
|
std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
|
||||||
|
|
||||||
// TODO: temporary until #77 is merged, need this now for some tokenizer tests
|
// TODO: temporary until #77 is merged, need this now for some tokenizer tests
|
||||||
bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);
|
bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);
|
||||||
|
Loading…
Reference in New Issue
Block a user