mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
ggml : fix rope + llama minor optimizations (#3560)
* Minor fixes and fixed memleak * Using const auto references in range-based loop C++17
This commit is contained in:
parent
e78f3ef24a
commit
f439e506e8
@ -399,7 +399,7 @@ namespace grammar_parser {
|
|||||||
void print_grammar(FILE * file, const parse_state & state) {
|
void print_grammar(FILE * file, const parse_state & state) {
|
||||||
try {
|
try {
|
||||||
std::map<uint32_t, std::string> symbol_id_names;
|
std::map<uint32_t, std::string> symbol_id_names;
|
||||||
for (auto kv : state.symbol_ids) {
|
for (const auto & kv : state.symbol_ids) {
|
||||||
symbol_id_names[kv.second] = kv.first;
|
symbol_id_names[kv.second] = kv.first;
|
||||||
}
|
}
|
||||||
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
||||||
|
@ -1425,7 +1425,7 @@ void train_opt_callback(void * vdata, int accum_step, float * sched, bool * canc
|
|||||||
|
|
||||||
int impr_plot = -(int)(1 + (opt->loss_before - opt->loss_after) * 10.0f + 0.5f);
|
int impr_plot = -(int)(1 + (opt->loss_before - opt->loss_after) * 10.0f + 0.5f);
|
||||||
if (impr_plot > 0) impr_plot = 0;
|
if (impr_plot > 0) impr_plot = 0;
|
||||||
if (std::isnan(opt->loss_before) || std::isnan(opt->loss_before)) impr_plot = 0;
|
if (std::isnan(opt->loss_before) || std::isnan(opt->loss_after)) impr_plot = 0;
|
||||||
printf("%s: iter=%6d sample=%zu/%zu sched=%f loss=%f",
|
printf("%s: iter=%6d sample=%zu/%zu sched=%f loss=%f",
|
||||||
__func__, opt->iter, std::min(1+train->shuffle_next_sample, train->shuffle_sample_count), train->shuffle_sample_count,
|
__func__, opt->iter, std::min(1+train->shuffle_next_sample, train->shuffle_sample_count), train->shuffle_sample_count,
|
||||||
*sched, opt->loss_after);
|
*sched, opt->loss_after);
|
||||||
|
3
ggml.c
3
ggml.c
@ -13537,7 +13537,7 @@ static void ggml_compute_forward_rope_f16(
|
|||||||
dst_data[n_dims] = GGML_FP32_TO_FP16(x2*cos_block_theta - x3*sin_block_theta);
|
dst_data[n_dims] = GGML_FP32_TO_FP16(x2*cos_block_theta - x3*sin_block_theta);
|
||||||
dst_data[n_dims/2*3] = GGML_FP32_TO_FP16(x2*sin_block_theta + x3*cos_block_theta);
|
dst_data[n_dims/2*3] = GGML_FP32_TO_FP16(x2*sin_block_theta + x3*cos_block_theta);
|
||||||
}
|
}
|
||||||
} if (!is_neox) {
|
} else if (!is_neox) {
|
||||||
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
|
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
|
||||||
const float cos_theta = cosf(theta);
|
const float cos_theta = cosf(theta);
|
||||||
const float sin_theta = sinf(theta);
|
const float sin_theta = sinf(theta);
|
||||||
@ -19170,6 +19170,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|||||||
|
|
||||||
if (idx == -1) {
|
if (idx == -1) {
|
||||||
fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
|
fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
|
||||||
|
fclose(fout);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6324,7 +6324,6 @@ struct llm_tokenizer_bpe {
|
|||||||
llm_symbol sym;
|
llm_symbol sym;
|
||||||
size_t char_len = std::min(word.size() - offset, (size_t) ::utf8_len(word[offset]));
|
size_t char_len = std::min(word.size() - offset, (size_t) ::utf8_len(word[offset]));
|
||||||
sym.text = word.c_str() + offset;
|
sym.text = word.c_str() + offset;
|
||||||
sym.n = 1;
|
|
||||||
sym.n = char_len;
|
sym.n = char_len;
|
||||||
offset += sym.n;
|
offset += sym.n;
|
||||||
sym.prev = index - 1;
|
sym.prev = index - 1;
|
||||||
@ -7054,7 +7053,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
|
|||||||
std::vector<llama_grammar_candidate> rejects;
|
std::vector<llama_grammar_candidate> rejects;
|
||||||
|
|
||||||
if (stack.empty()) {
|
if (stack.empty()) {
|
||||||
for (auto tok : candidates) {
|
for (const auto & tok : candidates) {
|
||||||
if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
|
if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
|
||||||
rejects.push_back(tok);
|
rejects.push_back(tok);
|
||||||
}
|
}
|
||||||
@ -7065,7 +7064,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
|
|||||||
const llama_grammar_element * stack_pos = stack.back();
|
const llama_grammar_element * stack_pos = stack.back();
|
||||||
|
|
||||||
std::vector<llama_grammar_candidate> next_candidates;
|
std::vector<llama_grammar_candidate> next_candidates;
|
||||||
for (auto tok : candidates) {
|
for (const auto & tok : candidates) {
|
||||||
if (*tok.code_points == 0) {
|
if (*tok.code_points == 0) {
|
||||||
// reached end of full codepoints in token, reject iff it ended in a partial sequence
|
// reached end of full codepoints in token, reject iff it ended in a partial sequence
|
||||||
// that cannot satisfy this position in grammar
|
// that cannot satisfy this position in grammar
|
||||||
@ -7091,7 +7090,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
|
|||||||
llama_grammar_advance_stack(rules, stack_after, next_stacks);
|
llama_grammar_advance_stack(rules, stack_after, next_stacks);
|
||||||
|
|
||||||
auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
|
auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
|
||||||
for (auto tok : next_rejects) {
|
for (const auto & tok : next_rejects) {
|
||||||
rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
|
rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user