mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-01 06:14:35 +00:00
452 lines
14 KiB
HTML
452 lines
14 KiB
HTML
<html>
|
|
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
|
|
<title>llama.cpp - chat</title>
|
|
|
|
<style>
|
|
body {
|
|
background-color: #fff;
|
|
color: #000;
|
|
font-family: system-ui;
|
|
font-size: 90%;
|
|
max-width: 600px;
|
|
min-width: 300px;
|
|
line-height: 1.2;
|
|
margin: 0 auto;
|
|
padding: 0 0.5em;
|
|
}
|
|
|
|
#container {
|
|
margin: 0em auto;
|
|
display: flex;
|
|
flex-direction: column;
|
|
justify-content: space-between;
|
|
height: 100%;
|
|
}
|
|
|
|
main {
|
|
margin: 3px;
|
|
display: flex;
|
|
flex-direction: column;
|
|
justify-content: space-between;
|
|
gap: 1em;
|
|
|
|
flex-grow: 1;
|
|
overflow-y: auto;
|
|
|
|
border: 1px solid #ccc;
|
|
border-radius: 5px;
|
|
padding: 0.5em;
|
|
}
|
|
|
|
.bubble {
|
|
border: 1px solid;
|
|
border-radius: 10px;
|
|
padding: 5px;
|
|
}
|
|
|
|
.user {
|
|
background-color: #161616;
|
|
color: #d6d6d6;
|
|
margin-left: 20%;
|
|
border-bottom-right-radius: 0;
|
|
}
|
|
|
|
.asst {
|
|
background-color: #d6d6d6;
|
|
color: #161616;
|
|
text-align: left;
|
|
margin-right: 20%;
|
|
border-top-left-radius: 0;
|
|
}
|
|
|
|
.typing {
|
|
color: #888;
|
|
text-align: left;
|
|
font-size: 120%;
|
|
}
|
|
|
|
p {
|
|
overflow-wrap: break-word;
|
|
word-wrap: break-word;
|
|
hyphens: auto;
|
|
margin-top: 0.5em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
|
|
#write form {
|
|
margin: 1em 0 0 0;
|
|
display: flex;
|
|
flex-direction: column;
|
|
gap: 0.5em;
|
|
align-items: stretch;
|
|
}
|
|
|
|
.right {
|
|
display: flex;
|
|
flex-direction: row;
|
|
gap: 0.5em;
|
|
justify-content: flex-end;
|
|
}
|
|
|
|
fieldset {
|
|
border: none;
|
|
padding: 0;
|
|
margin: 0;
|
|
}
|
|
|
|
textarea {
|
|
padding: 5px;
|
|
flex-grow: 1;
|
|
width: 100%;
|
|
}
|
|
|
|
pre code {
|
|
display: block;
|
|
background-color: #222;
|
|
color: #ddd;
|
|
}
|
|
code {
|
|
font-family: monospace;
|
|
padding: 0.1em 0.3em;
|
|
border-radius: 3px;
|
|
}
|
|
|
|
fieldset label {
|
|
margin: 0.5em 0;
|
|
/*display: block;*/
|
|
}
|
|
|
|
header, footer {
|
|
text-align: center;
|
|
}
|
|
|
|
footer {
|
|
font-size: 80%;
|
|
color: #888;
|
|
}
|
|
</style>
|
|
|
|
<script type="module">
|
|
import {
|
|
html, h, signal, effect, computed, render, useSignal, useEffect, useRef
|
|
} from '/index.js'
|
|
|
|
import { llama } from '/completion.js'
|
|
|
|
const session = signal({
|
|
system: "A chat between a curious user and a pirate.",
|
|
system_cfg: "A chat between a curious user and an artificial intelligence assistant.",
|
|
message: "{{system}}\n\n### Instruction:\n{{user}}\n\n### Response:\n{{assistant}}",
|
|
stop: ["###"],
|
|
transcript: [],
|
|
type: "chat",
|
|
fullprompt: "", // debug
|
|
})
|
|
|
|
const params = signal({
|
|
n_predict: 400,
|
|
top_k: 40,
|
|
top_p: 0.95,
|
|
tfs_z: 1.0,
|
|
typical_p: 1.0,
|
|
temperature: 0.7,
|
|
repeat_penalty: 1.18,
|
|
frequency_penalty: 0.0,
|
|
presence_penalty: 0.0,
|
|
repeat_last_n: 256,
|
|
mirostat: 0,
|
|
mirostat_tau: 5.0,
|
|
mirostat_eta: 0.1,
|
|
cfg_scale: 1.0,
|
|
penalize_nl: true,
|
|
})
|
|
|
|
const llamaStats = signal(null)
|
|
const controller = signal(null)
|
|
|
|
const generating = computed(() => controller.value == null )
|
|
const chatStarted = computed(() => session.value.transcript.length > 0)
|
|
|
|
const transcriptUpdate = (transcript) => {
|
|
session.value = {
|
|
...session.value,
|
|
transcript,
|
|
}
|
|
}
|
|
|
|
// simple template replace
|
|
const template = (str, extraSettings) => {
|
|
let settings = session.value
|
|
if (extraSettings) {
|
|
settings = { ...settings, ...extraSettings }
|
|
}
|
|
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]))
|
|
}
|
|
|
|
// send message to server
|
|
const chat = async (msg) => {
|
|
if (controller.value) {
|
|
console.log('already running...')
|
|
return
|
|
}
|
|
controller.value = new AbortController()
|
|
|
|
const history = session.value.transcript
|
|
const system = history.length == 0 ? session.value.system : ""
|
|
transcriptUpdate([...history, { system, user: msg, assistant: "" }])
|
|
|
|
const prompt = session.value.transcript.map(t =>
|
|
template(session.value.message, t)).join("").trimEnd()
|
|
|
|
const cfg_negative_prompt = params.value.cfg_scale > 1 ? session.value.transcript.map(t =>
|
|
template(session.value.message, { ...t, system: session.value.system_cfg })
|
|
).join("").trimEnd() : ""
|
|
session.value = { ...session.value, fullprompt: cfg_negative_prompt } // debug
|
|
|
|
let currentMessage = ''
|
|
|
|
const llamaParams = {
|
|
...params.value,
|
|
cfg_negative_prompt,
|
|
stop: session.stop,
|
|
}
|
|
|
|
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
|
|
const data = chunk.data
|
|
currentMessage += data.content
|
|
|
|
// remove leading whitespace
|
|
currentMessage = currentMessage.replace(/^\s+/, "")
|
|
|
|
transcriptUpdate([...history, { system, user: msg, assistant: currentMessage }])
|
|
|
|
if (data.stop) {
|
|
console.log("Completion finished: '", currentMessage, "', summary: ", data)
|
|
}
|
|
|
|
if (data.timings) {
|
|
llamaStats.value = data.timings
|
|
}
|
|
}
|
|
|
|
controller.value = null
|
|
}
|
|
|
|
function MessageInput() {
|
|
const message = useSignal("")
|
|
|
|
const stop = (e) => {
|
|
e.preventDefault()
|
|
if (controller.value) {
|
|
controller.value.abort()
|
|
controller.value = null
|
|
}
|
|
}
|
|
|
|
const reset = (e) => {
|
|
stop(e)
|
|
transcriptUpdate([])
|
|
}
|
|
|
|
const submit = (e) => {
|
|
stop(e)
|
|
chat(message.value)
|
|
message.value = ""
|
|
}
|
|
|
|
const enterSubmits = (event) => {
|
|
if (event.which === 13 && !event.shiftKey) {
|
|
submit(event)
|
|
}
|
|
}
|
|
|
|
return html`
|
|
<form onsubmit=${submit}>
|
|
<div>
|
|
<textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/>
|
|
</div>
|
|
<div class="right">
|
|
<button type="submit" disabled=${!generating.value} >Send</button>
|
|
<button onclick=${stop} disabled=${generating}>Stop</button>
|
|
<button onclick=${reset}>Reset</button>
|
|
</div>
|
|
<pre>${session.value.fullprompt/* debug */}</pre>
|
|
</form>
|
|
`
|
|
}
|
|
|
|
const ChatLog = (props) => {
|
|
const messages = session.value.transcript
|
|
const container = useRef(null)
|
|
|
|
useEffect(() => {
|
|
// scroll to bottom (if needed)
|
|
if (container.current && container.current.scrollHeight <= container.current.scrollTop + container.current.offsetHeight + 300) {
|
|
container.current.scrollTo(0, container.current.scrollHeight)
|
|
}
|
|
}, [messages])
|
|
|
|
return html`
|
|
<section id="chat" ref=${container}>
|
|
${messages.map(({system, user, assistant}) => html`
|
|
${system !== "" && html`<p><em><${Markdownish} text=${system} /></em></p>`}
|
|
<p class="user bubble"><${Markdownish} text=${user} /></p>
|
|
${assistant !== "" ?
|
|
html`<p class="asst bubble"><${Markdownish} text=${assistant} /></p>` :
|
|
html`<p class="typing">...</p>`}
|
|
`)}
|
|
</section>
|
|
<pre>${JSON.stringify(session.value.transcript, null, 2)}</pre>` // debug
|
|
}
|
|
|
|
const ParamSlider = ({param, min, max, step, children}) => {
|
|
const updateParamsFloat = (el) => params.value = { ...params.value, [param]: parseFloat(el.target.value) }
|
|
return html`
|
|
<div>
|
|
<label for="${param}"><code>${param}</code></label>
|
|
<input type="range" id="${param}" min="${min}" max="${max}" step="${step}" name="${param}" value="${params.value[param]}" oninput=${updateParamsFloat} />
|
|
<span>${params.value[param]}</span>
|
|
<span>${children}</span>
|
|
</div>
|
|
`
|
|
}
|
|
|
|
const ConfigForm = (props) => {
|
|
const updateSession = (el) => session.value = { ...session.value, [el.target.name]: el.target.value }
|
|
const updateParams = (el) => params.value = { ...params.value, [el.target.name]: el.target.value }
|
|
const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
|
|
const appendArray = () => session.value = { ...session.value, stop: [...session.value.stop, ""] }
|
|
const updateArray = (el) => {
|
|
const [name, index] = el.target.name.split(".")
|
|
const newarr = session.value[name].map((v, i) => i == index ? el.target.value : v).filter(x => x !== "")
|
|
session.value = { ...session.value, [name]: newarr }
|
|
}
|
|
|
|
return html`
|
|
<form>
|
|
<fieldset>
|
|
<div>
|
|
<label for="system">System prompt</label>
|
|
<textarea type="text" name="system" value="${session.value.system}" rows=4 oninput=${updateSession}/>
|
|
</div>
|
|
|
|
<div>
|
|
<label for="message">Message template</label>
|
|
<textarea type="text" name="message" value="${session.value.message}" rows=7 oninput=${updateSession}/>
|
|
</div>
|
|
|
|
<div>
|
|
<label for="stop">Stop strings</label>
|
|
${session.value.stop.map((stop, i) => html`
|
|
<p><input type="text" name="stop.${i}" value="${stop}" oninput=${updateArray}/></p>
|
|
`)}
|
|
<input type="button" value="+" onclick=${appendArray} />
|
|
|
|
<pre>${JSON.stringify(session.value.stop)/* debug */}</pre>
|
|
</div>
|
|
|
|
<${ParamSlider} min=1 max=10 step=0.1 param=cfg_scale>CFG scale<//>
|
|
${params.value.cfg_scale > 1 && html`
|
|
<div>
|
|
<label for="system_cfg">CFG System prompt</label>
|
|
<textarea type="text" name="system_cfg" value="${session.value.system_cfg}" rows=4 oninput=${updateSession}/>
|
|
</div>
|
|
`}
|
|
|
|
<${ParamSlider} min=1 max=1000 step=1 param=n_predict>Predict N tokens<//>
|
|
<${ParamSlider} min=0 max=1000 step=1 param=repeat_last_n>Penalize last N tokens<//>
|
|
${params.value.repeat_last_n > 0 && html`
|
|
<${ParamSlider} min=0 max=4 step=0.01 param=repeat_penalty>Penalize repeat sequence<//>
|
|
<${ParamSlider} min=0 max=4 step=0.01 param=frequency_penalty>Penalize frequent tokens<//>
|
|
<${ParamSlider} min=0 max=4 step=0.01 param=presence_penalty>Penalize tokens not present in prompt<//>
|
|
`}
|
|
<${ParamSlider} min=0 max=2 step=0.01 param=temperature>Temperature<//>
|
|
${params.value.temperature > 0 && html`
|
|
<div>
|
|
<input id=mirostat_0 type=radio name=mirostat checked=${params.value.mirostat == 0} value=0 oninput=${updateParamsFloat} />
|
|
<label for=mirostat_0>Temperature</label>
|
|
|
|
<input id=mirostat_1 type=radio name=mirostat checked=${params.value.mirostat == 1} value=1 oninput=${updateParamsFloat} />
|
|
<label for=mirostat_1>Mirostat v1</label>
|
|
|
|
<input id=mirostat_2 type=radio name=mirostat checked=${params.value.mirostat == 2} value=2 oninput=${updateParamsFloat} />
|
|
<label for=mirostat_2>Mirostat v2</label>
|
|
</div>
|
|
|
|
${params.value.mirostat == 0 && html`
|
|
<${ParamSlider} min=1 max=1000 step=1 param=top_k>Top K<//>
|
|
<${ParamSlider} min=0 max=1 step=0.01 param=tfs_z>Tail free sampling<//>
|
|
<${ParamSlider} min=0 max=1 step=0.01 param=typical_p>Typical P<//>
|
|
<${ParamSlider} min=0 max=1 step=0.01 param=top_p>Top P<//>
|
|
`}
|
|
${params.value.mirostat > 0 && html`
|
|
<${ParamSlider} min=0 max=1 step=0.01 param=mirostat_eta>Mirostat eta, learning rate<//>
|
|
<${ParamSlider} min=0 max=1000 step=1 param=mirostat_tau>Mirostat tau, target entropy<//>
|
|
`}
|
|
`}
|
|
<pre>${JSON.stringify(params.value, null, 2)/*debug*/}</pre>
|
|
</fieldset>
|
|
</form>
|
|
`
|
|
}
|
|
// poor mans markdown replacement
|
|
const Markdownish = (params) => {
|
|
const md = params.text
|
|
.replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
|
|
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
|
.replace(/__(.*?)__/g, '<strong>$1</strong>')
|
|
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
|
.replace(/_(.*?)_/g, '<em>$1</em>')
|
|
.replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
|
|
.replace(/`(.*?)`/g, '<code>$1</code>')
|
|
.replace(/\n/gim, '<br />')
|
|
return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`
|
|
}
|
|
|
|
const ModelGenerationInfo = (params) => {
|
|
if (!llamaStats.value) {
|
|
return html`<span/>`
|
|
}
|
|
return html`
|
|
<span>
|
|
${llamaStats.value.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.predicted_per_second.toFixed(2)} tokens per second
|
|
</span>
|
|
`
|
|
}
|
|
|
|
function App(props) {
|
|
|
|
return html`
|
|
<div id="container">
|
|
<header>
|
|
<h1>llama.cpp</h1>
|
|
</header>
|
|
|
|
<main id="content">
|
|
<${chatStarted.value ? ChatLog : ConfigForm} />
|
|
</main>
|
|
|
|
<section id="write">
|
|
<${MessageInput} />
|
|
</section>
|
|
|
|
<footer>
|
|
<p><${ModelGenerationInfo} /></p>
|
|
<p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p>
|
|
</footer>
|
|
</div>
|
|
`
|
|
}
|
|
|
|
render(h(App), document.body)
|
|
</script>
|
|
</head>
|
|
|
|
<body>
|
|
</body>
|
|
|
|
</html>
|