diff --git a/examples/server/public/index.html b/examples/server/public/index.html index 65ed29fb5..58307d604 100644 --- a/examples/server/public/index.html +++ b/examples/server/public/index.html @@ -15,7 +15,8 @@ min-width: 300px; line-height: 1.2; margin: 0 auto; - padding: 0 0.5em; } + padding: 0 0.5em; + } #container { margin: 0em auto; @@ -50,6 +51,7 @@ background-color: #161616; color: #d6d6d6; margin-left: 20%; + border-bottom-right-radius: 0; } .asst { @@ -57,6 +59,7 @@ color: #161616; text-align: left; margin-right: 20%; + border-top-left-radius: 0; } .typing { @@ -113,7 +116,7 @@ fieldset label { margin: 0.5em 0; - display: block; + /*display: block;*/ } header, footer { @@ -135,22 +138,30 @@ const session = signal({ system: "A chat between a curious user and a pirate.", + system_cfg: "A chat between a curious user and an artificial intelligence assistant.", message: "{{system}}\n\n### Instruction:\n{{user}}\n\n### Response:\n{{assistant}}", stop: ["###"], transcript: [], type: "chat", - char: "llama", - user: "User", - fullprompt: "", + fullprompt: "", // debug }) const params = signal({ n_predict: 400, - temperature: 0.7, - repeat_last_n: 256, - repeat_penalty: 1.18, top_k: 40, - top_p: 0.5, + top_p: 0.95, + tfs_z: 1.0, + typical_p: 1.0, + temperature: 0.7, + repeat_penalty: 1.18, + frequency_penalty: 0.0, + presence_penalty: 0.0, + repeat_last_n: 256, + mirostat: 0, + mirostat_tau: 5.0, + mirostat_eta: 0.1, + cfg_scale: 1.0, + penalize_nl: true, }) const llamaStats = signal(null) @@ -187,12 +198,19 @@ const system = history.length == 0 ? session.value.system : "" transcriptUpdate([...history, { system, user: msg, assistant: "" }]) - const prompt = session.value.transcript.map(t => template(session.value.message, t)).join("").trimEnd() - session.value = { ...session.value, fullprompt: prompt } // debug + const prompt = session.value.transcript.map(t => + template(session.value.message, t)).join("").trimEnd() + + const cfg_negative_prompt = params.value.cfg_scale > 1 ? session.value.transcript.map(t => + template(session.value.message, { ...t, system: session.value.system_cfg }) + ).join("").trimEnd() : "" + session.value = { ...session.value, fullprompt: cfg_negative_prompt } // debug + let currentMessage = '' const llamaParams = { ...params.value, + cfg_negative_prompt, stop: session.stop, } @@ -284,6 +302,18 @@
${JSON.stringify(session.value.transcript, null, 2)}` // debug } + const ParamSlider = ({param, min, max, step, children}) => { + const updateParamsFloat = (el) => params.value = { ...params.value, [param]: parseFloat(el.target.value) } + return html` +
${JSON.stringify(session.value.stop)/* debug */}-
${JSON.stringify(params.value, null, 2)/*debug*/}` diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2b6d97ae5..76013f75a 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -398,7 +398,7 @@ struct llama_server_context { evaluator.evaluate(params.n_threads, params.n_batch); if (cfg_enabled) { - evaluator_guidance.evaluate(params.n_threads, params.n_batch); + evaluator_guidance.evaluate(params.n_threads, params.n_batch); } if (params.n_predict == 0) { @@ -1067,7 +1067,7 @@ int main(int argc, char ** argv) { llama.loadPrompt(); llama.beginCompletion(); - if (llama.params.cfg_negative_prompt.size() > 0) { + if (llama.params.cfg_scale > 1.0f && llama.params.cfg_negative_prompt.size() > 0) { llama.cfg_enabled = true; llama.loadGuidancePrompt(); }