mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
Remove unused data and add fixes (#5154)
* Remove unused data and add fixes * Add missing file * Address review comments * Replace the scope of vq allocation
This commit is contained in:
parent
ec903c0341
commit
35a2ee9143
@ -13,6 +13,7 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_
|
|||||||
// will be empty (default) if there are parse errors
|
// will be empty (default) if there are parse errors
|
||||||
if (result->parsed_grammar.rules.empty()) {
|
if (result->parsed_grammar.rules.empty()) {
|
||||||
fprintf(stderr, "%s: failed to parse grammar\n", __func__);
|
fprintf(stderr, "%s: failed to parse grammar\n", __func__);
|
||||||
|
delete result;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -241,7 +241,7 @@ int main(int argc, char ** argv) {
|
|||||||
LOG("add_bos: %d\n", add_bos);
|
LOG("add_bos: %d\n", add_bos);
|
||||||
|
|
||||||
bool suff_rm_leading_spc = params.escape;
|
bool suff_rm_leading_spc = params.escape;
|
||||||
if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) {
|
if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
|
||||||
params.input_suffix.erase(0, 1);
|
params.input_suffix.erase(0, 1);
|
||||||
suff_rm_leading_spc = false;
|
suff_rm_leading_spc = false;
|
||||||
}
|
}
|
||||||
|
@ -1277,7 +1277,6 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
|||||||
".*weight",
|
".*weight",
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<uint8_t> read_data(512);
|
|
||||||
std::vector<uint8_t> work(512);
|
std::vector<uint8_t> work(512);
|
||||||
std::vector<float> conv_buf(512);
|
std::vector<float> conv_buf(512);
|
||||||
std::vector<int64_t> hist_all(1 << 4, 0);
|
std::vector<int64_t> hist_all(1 << 4, 0);
|
||||||
|
@ -681,7 +681,7 @@ struct llama_server_context
|
|||||||
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
|
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
|
||||||
size_t end_prefix = pos;
|
size_t end_prefix = pos;
|
||||||
pos += pattern.length();
|
pos += pattern.length();
|
||||||
size_t end_pos = prompt.find("]", pos);
|
size_t end_pos = prompt.find(']', pos);
|
||||||
if (end_pos != std::string::npos)
|
if (end_pos != std::string::npos)
|
||||||
{
|
{
|
||||||
std::string image_id = prompt.substr(pos, end_pos - pos);
|
std::string image_id = prompt.substr(pos, end_pos - pos);
|
||||||
|
@ -243,7 +243,6 @@ int main(int argc, char** argv) {
|
|||||||
if (useQ4_1) q41.resize(n4);
|
if (useQ4_1) q41.resize(n4);
|
||||||
else q40.resize(n4);
|
else q40.resize(n4);
|
||||||
std::vector<block_q8_0> q8(n8);
|
std::vector<block_q8_0> q8(n8);
|
||||||
std::vector<int64_t> H(16, 0);
|
|
||||||
double sumt = 0, sumt2 = 0, maxt = 0;
|
double sumt = 0, sumt2 = 0, maxt = 0;
|
||||||
double sumqt = 0, sumqt2 = 0, maxqt = 0;
|
double sumqt = 0, sumqt2 = 0, maxqt = 0;
|
||||||
double sum = 0, sumq = 0, exactSum = 0;
|
double sum = 0, sumq = 0, exactSum = 0;
|
||||||
|
@ -102,7 +102,6 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|||||||
} else if (t->type == GGML_TYPE_I8) {
|
} else if (t->type == GGML_TYPE_I8) {
|
||||||
tv.push_back((float)*(int8_t *) &buf[i]);
|
tv.push_back((float)*(int8_t *) &buf[i]);
|
||||||
} else if (quantized) {
|
} else if (quantized) {
|
||||||
std::vector<float> vq(ggml_blck_size(t->type));
|
|
||||||
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
|
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
|
||||||
tv.insert(tv.end(), vq.begin(), vq.end());
|
tv.insert(tv.end(), vq.begin(), vq.end());
|
||||||
} else {
|
} else {
|
||||||
|
@ -190,7 +190,6 @@ int main()
|
|||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<const llama_grammar_element *>> next_stacks;
|
|
||||||
std::vector<llama_grammar_candidate> next_candidates;
|
std::vector<llama_grammar_candidate> next_candidates;
|
||||||
next_candidates.resize(24);
|
next_candidates.resize(24);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user