mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
Fix grammar-based sampling issue in server (#2566)
This commit is contained in:
parent
916a9acdd0
commit
1638757767
@ -196,6 +196,7 @@ struct llama_server_context
|
|||||||
llama_context *ctx = nullptr;
|
llama_context *ctx = nullptr;
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
||||||
|
grammar_parser::parse_state parsed_grammar;
|
||||||
llama_grammar *grammar = nullptr;
|
llama_grammar *grammar = nullptr;
|
||||||
|
|
||||||
bool truncated = false;
|
bool truncated = false;
|
||||||
@ -241,10 +242,13 @@ struct llama_server_context
|
|||||||
stopped_limit = false;
|
stopped_limit = false;
|
||||||
stopping_word = "";
|
stopping_word = "";
|
||||||
multibyte_pending = 0;
|
multibyte_pending = 0;
|
||||||
grammar = nullptr;
|
|
||||||
|
|
||||||
n_remain = 0;
|
n_remain = 0;
|
||||||
n_past = 0;
|
n_past = 0;
|
||||||
|
|
||||||
|
if (grammar != nullptr) {
|
||||||
|
llama_grammar_free(grammar);
|
||||||
|
grammar = nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool loadModel(const gpt_params ¶ms_)
|
bool loadModel(const gpt_params ¶ms_)
|
||||||
@ -265,8 +269,6 @@ struct llama_server_context
|
|||||||
bool loadGrammar()
|
bool loadGrammar()
|
||||||
{
|
{
|
||||||
if (!params.grammar.empty()) {
|
if (!params.grammar.empty()) {
|
||||||
grammar_parser::parse_state parsed_grammar;
|
|
||||||
|
|
||||||
parsed_grammar = grammar_parser::parse(params.grammar.c_str());
|
parsed_grammar = grammar_parser::parse(params.grammar.c_str());
|
||||||
// will be empty (default) if there are parse errors
|
// will be empty (default) if there are parse errors
|
||||||
if (parsed_grammar.rules.empty()) {
|
if (parsed_grammar.rules.empty()) {
|
||||||
|
Loading…
Reference in New Issue
Block a user