mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
server : display token probabilities in the UI (#2489)
* server : add n_probs param in chat UI * server : keep message data array & show in probabilites component * server : add simple popover component * server : fix completion_probabilities undefined if not set n_probs * server : implement Probabilites * server : handle bytes * server : make n_probs max to 10 for easy scroll * server : adjust for dark/light mode * server : Fix regenerated prompt * server : update index.html.hpp * server : convert prob to percentage + show original value as div title * server : fix Probabilites not used if included empty str * server : skip byte pair in display probabilites * server : remove array check of completion_probabilities in messages * skip empty array or byte pair (> 1) in Probabilites * generate index.html.hpp * fix incorrect prob convert if the str is already a known token * use final response to show probabilities on stop * revert unnecessary change * correct probabilites usage * remove unused function * always send partial response for get correct probs of last to_send * fix typo * fix content of format_final_response * refactor probs render & make pColor transparent if not found * send empty string when got stop_pos in partial * avoid unnecessary empty data event & send rest of partial tokens on stop * use <br /> for new line * skip -1 tok in loop to avoid send '' on end * trim last new lines on stop * revert unnecessary change
This commit is contained in:
parent
5439a0ab57
commit
29674ab4e8
File diff suppressed because it is too large
Load Diff
@ -102,6 +102,17 @@
|
|||||||
padding: 0.5em;
|
padding: 0.5em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.prob-set {
|
||||||
|
padding: 0.3em;
|
||||||
|
border-bottom: 1px solid #ccc;
|
||||||
|
}
|
||||||
|
|
||||||
|
.popover-content {
|
||||||
|
position: absolute;
|
||||||
|
background-color: white;
|
||||||
|
padding: 0.2em;
|
||||||
|
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
textarea {
|
textarea {
|
||||||
padding: 5px;
|
padding: 5px;
|
||||||
@ -133,11 +144,17 @@
|
|||||||
font-size: 80%;
|
font-size: 80%;
|
||||||
color: #888;
|
color: #888;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@media (prefers-color-scheme: dark) {
|
||||||
|
.popover-content {
|
||||||
|
background-color: black;
|
||||||
|
}
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
<script type="module">
|
<script type="module">
|
||||||
import {
|
import {
|
||||||
html, h, signal, effect, computed, render, useSignal, useEffect, useRef
|
html, h, signal, effect, computed, render, useSignal, useEffect, useRef, Component
|
||||||
} from '/index.js';
|
} from '/index.js';
|
||||||
|
|
||||||
import { llama } from '/completion.js';
|
import { llama } from '/completion.js';
|
||||||
@ -168,6 +185,7 @@
|
|||||||
mirostat_tau: 5, // target entropy
|
mirostat_tau: 5, // target entropy
|
||||||
mirostat_eta: 0.1, // learning rate
|
mirostat_eta: 0.1, // learning rate
|
||||||
grammar: '',
|
grammar: '',
|
||||||
|
n_probs: 0, // no completion_probabilities
|
||||||
})
|
})
|
||||||
|
|
||||||
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */
|
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */
|
||||||
@ -334,10 +352,21 @@
|
|||||||
|
|
||||||
const prompt = template(session.value.template, {
|
const prompt = template(session.value.template, {
|
||||||
message: msg,
|
message: msg,
|
||||||
history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"),
|
history: session.value.transcript.flatMap(
|
||||||
|
([name, data]) =>
|
||||||
|
template(
|
||||||
|
session.value.historyTemplate,
|
||||||
|
{
|
||||||
|
name,
|
||||||
|
message: Array.isArray(data) ?
|
||||||
|
data.map(msg => msg.content).join('').replace(/^\s/, '') :
|
||||||
|
data,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
).join("\n"),
|
||||||
});
|
});
|
||||||
|
|
||||||
let currentMessage = '';
|
const currentMessages = [];
|
||||||
const history = session.value.transcript
|
const history = session.value.transcript
|
||||||
|
|
||||||
const llamaParams = {
|
const llamaParams = {
|
||||||
@ -347,15 +376,19 @@
|
|||||||
|
|
||||||
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
|
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
|
||||||
const data = chunk.data;
|
const data = chunk.data;
|
||||||
currentMessage += data.content;
|
|
||||||
|
|
||||||
// remove leading whitespace
|
|
||||||
currentMessage = currentMessage.replace(/^\s+/, "")
|
|
||||||
|
|
||||||
transcriptUpdate([...history, ["{{char}}", currentMessage]])
|
|
||||||
|
|
||||||
if (data.stop) {
|
if (data.stop) {
|
||||||
console.log("Completion finished: '", currentMessage, "', summary: ", data);
|
while (
|
||||||
|
currentMessages.length > 0 &&
|
||||||
|
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
|
||||||
|
) {
|
||||||
|
currentMessages.pop();
|
||||||
|
}
|
||||||
|
transcriptUpdate([...history, ["{{char}}", currentMessages]])
|
||||||
|
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
|
||||||
|
} else {
|
||||||
|
currentMessages.push(data);
|
||||||
|
transcriptUpdate([...history, ["{{char}}", currentMessages]])
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data.timings) {
|
if (data.timings) {
|
||||||
@ -420,8 +453,18 @@
|
|||||||
}
|
}
|
||||||
}, [messages])
|
}, [messages])
|
||||||
|
|
||||||
const chatLine = ([user, msg]) => {
|
const chatLine = ([user, data], index) => {
|
||||||
return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdownish} text=${template(msg)} /></p>`
|
let message
|
||||||
|
const isArrayMessage = Array.isArray(data)
|
||||||
|
if (params.value.n_probs > 0 && isArrayMessage) {
|
||||||
|
message = html`<${Probabilities} data=${data} />`
|
||||||
|
} else {
|
||||||
|
const text = isArrayMessage ?
|
||||||
|
data.map(msg => msg.content).join('').replace(/^\s+/, '') :
|
||||||
|
data;
|
||||||
|
message = html`<${Markdownish} text=${template(text)} />`
|
||||||
|
}
|
||||||
|
return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
|
||||||
};
|
};
|
||||||
|
|
||||||
return html`
|
return html`
|
||||||
@ -568,10 +611,71 @@
|
|||||||
${FloatField({label: "Mirostat tau", max: 10.0, min: 0.0, name: "mirostat_tau", step: 0.01, value: params.value.mirostat_tau})}
|
${FloatField({label: "Mirostat tau", max: 10.0, min: 0.0, name: "mirostat_tau", step: 0.01, value: params.value.mirostat_tau})}
|
||||||
${FloatField({label: "Mirostat eta", max: 1.0, min: 0.0, name: "mirostat_eta", step: 0.01, value: params.value.mirostat_eta})}
|
${FloatField({label: "Mirostat eta", max: 1.0, min: 0.0, name: "mirostat_eta", step: 0.01, value: params.value.mirostat_eta})}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
|
<fieldset>
|
||||||
|
${IntField({label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs})}
|
||||||
|
</fieldset>
|
||||||
</details>
|
</details>
|
||||||
</form>
|
</form>
|
||||||
`
|
`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const probColor = (p) => {
|
||||||
|
const r = Math.floor(192 * (1 - p));
|
||||||
|
const g = Math.floor(192 * p);
|
||||||
|
return `rgba(${r},${g},0,0.3)`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Probabilities = (params) => {
|
||||||
|
return params.data.map(msg => {
|
||||||
|
const { completion_probabilities } = msg;
|
||||||
|
if (
|
||||||
|
!completion_probabilities ||
|
||||||
|
completion_probabilities.length === 0
|
||||||
|
) return msg.content
|
||||||
|
|
||||||
|
if (completion_probabilities.length > 1) {
|
||||||
|
// Not for byte pair
|
||||||
|
if (completion_probabilities[0].content.startsWith('byte: \\')) return msg.content
|
||||||
|
|
||||||
|
const splitData = completion_probabilities.map(prob => ({
|
||||||
|
content: prob.content,
|
||||||
|
completion_probabilities: [prob]
|
||||||
|
}))
|
||||||
|
return html`<${Probabilities} data=${splitData} />`
|
||||||
|
}
|
||||||
|
|
||||||
|
const { probs, content } = completion_probabilities[0]
|
||||||
|
const found = probs.find(p => p.tok_str === msg.content)
|
||||||
|
const pColor = found ? probColor(found.prob) : 'transparent'
|
||||||
|
|
||||||
|
const popoverChildren = html`
|
||||||
|
<div class="prob-set">
|
||||||
|
${probs.map((p, index) => {
|
||||||
|
return html`
|
||||||
|
<div
|
||||||
|
key=${index}
|
||||||
|
title=${`prob: ${p.prob}`}
|
||||||
|
style=${{
|
||||||
|
padding: '0.3em',
|
||||||
|
backgroundColor: p.tok_str === content ? probColor(p.prob) : 'transparent'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<span>${p.tok_str}: </span>
|
||||||
|
<span>${Math.floor(p.prob * 100)}%</span>
|
||||||
|
</div>
|
||||||
|
`
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
`
|
||||||
|
|
||||||
|
return html`
|
||||||
|
<${Popover} style=${{ backgroundColor: pColor }} popoverChildren=${popoverChildren}>
|
||||||
|
${msg.content.match(/\n/gim) ? html`<br />` : msg.content}
|
||||||
|
</>
|
||||||
|
`
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// poor mans markdown replacement
|
// poor mans markdown replacement
|
||||||
const Markdownish = (params) => {
|
const Markdownish = (params) => {
|
||||||
const md = params.text
|
const md = params.text
|
||||||
@ -600,10 +704,121 @@
|
|||||||
`
|
`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// simple popover impl
|
||||||
|
const Popover = (props) => {
|
||||||
|
const isOpen = useSignal(false);
|
||||||
|
const position = useSignal({ top: '0px', left: '0px' });
|
||||||
|
const buttonRef = useRef(null);
|
||||||
|
const popoverRef = useRef(null);
|
||||||
|
|
||||||
|
const togglePopover = () => {
|
||||||
|
if (buttonRef.current) {
|
||||||
|
const rect = buttonRef.current.getBoundingClientRect();
|
||||||
|
position.value = {
|
||||||
|
top: `${rect.bottom + window.scrollY}px`,
|
||||||
|
left: `${rect.left + window.scrollX}px`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
isOpen.value = !isOpen.value;
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleClickOutside = (event) => {
|
||||||
|
if (popoverRef.current && !popoverRef.current.contains(event.target) && !buttonRef.current.contains(event.target)) {
|
||||||
|
isOpen.value = false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
document.addEventListener('mousedown', handleClickOutside);
|
||||||
|
return () => {
|
||||||
|
document.removeEventListener('mousedown', handleClickOutside);
|
||||||
|
};
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return html`
|
||||||
|
<span style=${props.style} ref=${buttonRef} onClick=${togglePopover}>${props.children}</span>
|
||||||
|
${isOpen.value && html`
|
||||||
|
<${Portal} into="#portal">
|
||||||
|
<div
|
||||||
|
ref=${popoverRef}
|
||||||
|
class="popover-content"
|
||||||
|
style=${{
|
||||||
|
top: position.value.top,
|
||||||
|
left: position.value.left,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
${props.popoverChildren}
|
||||||
|
</div>
|
||||||
|
</${Portal}>
|
||||||
|
`}
|
||||||
|
`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Source: preact-portal (https://github.com/developit/preact-portal/blob/master/src/preact-portal.js)
|
||||||
|
/** Redirect rendering of descendants into the given CSS selector */
|
||||||
|
class Portal extends Component {
|
||||||
|
componentDidUpdate(props) {
|
||||||
|
for (let i in props) {
|
||||||
|
if (props[i] !== this.props[i]) {
|
||||||
|
return setTimeout(this.renderLayer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
componentDidMount() {
|
||||||
|
this.isMounted = true;
|
||||||
|
this.renderLayer = this.renderLayer.bind(this);
|
||||||
|
this.renderLayer();
|
||||||
|
}
|
||||||
|
|
||||||
|
componentWillUnmount() {
|
||||||
|
this.renderLayer(false);
|
||||||
|
this.isMounted = false;
|
||||||
|
if (this.remote && this.remote.parentNode) this.remote.parentNode.removeChild(this.remote);
|
||||||
|
}
|
||||||
|
|
||||||
|
findNode(node) {
|
||||||
|
return typeof node === 'string' ? document.querySelector(node) : node;
|
||||||
|
}
|
||||||
|
|
||||||
|
renderLayer(show = true) {
|
||||||
|
if (!this.isMounted) return;
|
||||||
|
|
||||||
|
// clean up old node if moving bases:
|
||||||
|
if (this.props.into !== this.intoPointer) {
|
||||||
|
this.intoPointer = this.props.into;
|
||||||
|
if (this.into && this.remote) {
|
||||||
|
this.remote = render(html`<${PortalProxy} />`, this.into, this.remote);
|
||||||
|
}
|
||||||
|
this.into = this.findNode(this.props.into);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.remote = render(html`
|
||||||
|
<${PortalProxy} context=${this.context}>
|
||||||
|
${show && this.props.children || null}
|
||||||
|
</${PortalProxy}>
|
||||||
|
`, this.into, this.remote);
|
||||||
|
}
|
||||||
|
|
||||||
|
render() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// high-order component that renders its first child if it exists.
|
||||||
|
// used as a conditional rendering proxy.
|
||||||
|
class PortalProxy extends Component {
|
||||||
|
getChildContext() {
|
||||||
|
return this.props.context;
|
||||||
|
}
|
||||||
|
render({ children }) {
|
||||||
|
return children || null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function App(props) {
|
function App(props) {
|
||||||
|
|
||||||
return html`
|
return html`
|
||||||
<div id="container">
|
<div>
|
||||||
<header>
|
<header>
|
||||||
<h1>llama.cpp</h1>
|
<h1>llama.cpp</h1>
|
||||||
</header>
|
</header>
|
||||||
@ -624,11 +839,13 @@
|
|||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
||||||
render(h(App), document.body);
|
render(h(App), document.querySelector('#container'));
|
||||||
</script>
|
</script>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
|
<div id="container"></div>
|
||||||
|
<div id="portal"></div>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
@ -124,8 +124,9 @@ static void server_log(const char *level, const char *function, int line,
|
|||||||
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
||||||
{
|
{
|
||||||
std::string out = token == -1 ? "" : llama_token_to_str(ctx, token);
|
std::string out = token == -1 ? "" : llama_token_to_str(ctx, token);
|
||||||
// if first bit is 1, meaning it's a partial character
|
// if the size is 1 and first bit is 1, meaning it's a partial character
|
||||||
if (out.size() > 0 && (out[0] & 0x80) == 0x80)
|
// (size > 1 meaning it's already a known token)
|
||||||
|
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << std::hex << (out[0] & 0xff);
|
ss << std::hex << (out[0] & 0xff);
|
||||||
@ -1321,59 +1322,86 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
while (llama.has_next_token) {
|
while (llama.has_next_token) {
|
||||||
const completion_token_output token_with_probs = llama.doCompletion();
|
const completion_token_output token_with_probs = llama.doCompletion();
|
||||||
const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(llama.ctx, token_with_probs.tok);
|
if (token_with_probs.tok == -1 || llama.multibyte_pending > 0) {
|
||||||
if (llama.multibyte_pending > 0) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
const std::string token_text = llama_token_to_str(llama.ctx, token_with_probs.tok);
|
||||||
|
|
||||||
size_t pos = std::min(sent_count, llama.generated_text.size());
|
size_t pos = std::min(sent_count, llama.generated_text.size());
|
||||||
|
|
||||||
const std::string str_test = llama.generated_text.substr(pos);
|
const std::string str_test = llama.generated_text.substr(pos);
|
||||||
|
bool is_stop_full = false;
|
||||||
size_t stop_pos =
|
size_t stop_pos =
|
||||||
llama.findStoppingStrings(str_test, token_text.size(), STOP_FULL);
|
llama.findStoppingStrings(str_test, token_text.size(), STOP_FULL);
|
||||||
if (stop_pos != std::string::npos) {
|
if (stop_pos != std::string::npos) {
|
||||||
|
is_stop_full = true;
|
||||||
llama.generated_text.erase(
|
llama.generated_text.erase(
|
||||||
llama.generated_text.begin() + pos + stop_pos,
|
llama.generated_text.begin() + pos + stop_pos,
|
||||||
llama.generated_text.end());
|
llama.generated_text.end());
|
||||||
pos = std::min(sent_count, llama.generated_text.size());
|
pos = std::min(sent_count, llama.generated_text.size());
|
||||||
} else {
|
} else {
|
||||||
|
is_stop_full = false;
|
||||||
stop_pos = llama.findStoppingStrings(str_test, token_text.size(),
|
stop_pos = llama.findStoppingStrings(str_test, token_text.size(),
|
||||||
STOP_PARTIAL);
|
STOP_PARTIAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string to_send = llama.generated_text.substr(pos, stop_pos);
|
if (
|
||||||
sent_count += to_send.size();
|
stop_pos == std::string::npos ||
|
||||||
|
// Send rest of the text if we are at the end of the generation
|
||||||
|
(!llama.has_next_token && !is_stop_full && stop_pos > 0)
|
||||||
|
) {
|
||||||
|
const std::string to_send = llama.generated_text.substr(pos, std::string::npos);
|
||||||
|
|
||||||
std::vector<completion_token_output> probs_output = {};
|
sent_count += to_send.size();
|
||||||
|
|
||||||
if (llama.params.n_probs > 0) {
|
std::vector<completion_token_output> probs_output = {};
|
||||||
const std::vector<llama_token> to_send_toks = llama_tokenize(llama.ctx, to_send, false);
|
|
||||||
size_t probs_pos = std::min(sent_token_probs_index, llama.generated_token_probs.size());
|
if (llama.params.n_probs > 0) {
|
||||||
size_t probs_stop_pos = std::min(sent_token_probs_index + to_send_toks.size(), llama.generated_token_probs.size());
|
const std::vector<llama_token> to_send_toks = llama_tokenize(llama.ctx, to_send, false);
|
||||||
if (probs_pos < probs_stop_pos) {
|
size_t probs_pos = std::min(sent_token_probs_index, llama.generated_token_probs.size());
|
||||||
probs_output = std::vector<completion_token_output>(llama.generated_token_probs.begin() + probs_pos, llama.generated_token_probs.begin() + probs_stop_pos);
|
size_t probs_stop_pos = std::min(sent_token_probs_index + to_send_toks.size(), llama.generated_token_probs.size());
|
||||||
|
if (probs_pos < probs_stop_pos) {
|
||||||
|
probs_output = std::vector<completion_token_output>(llama.generated_token_probs.begin() + probs_pos, llama.generated_token_probs.begin() + probs_stop_pos);
|
||||||
|
}
|
||||||
|
sent_token_probs_index = probs_stop_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
const json data = format_partial_response(llama, to_send, probs_output);
|
||||||
|
|
||||||
|
const std::string str =
|
||||||
|
"data: " +
|
||||||
|
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||||
|
"\n\n";
|
||||||
|
|
||||||
|
LOG_VERBOSE("data stream", {
|
||||||
|
{ "to_send", str }
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!sink.write(str.data(), str.size())) {
|
||||||
|
LOG_VERBOSE("stream closed", {});
|
||||||
|
llama_print_timings(llama.ctx);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
sent_token_probs_index = probs_stop_pos;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const json data = llama.has_next_token
|
if (!llama.has_next_token) {
|
||||||
? format_partial_response(llama, to_send, probs_output)
|
// Generation is done, send extra information.
|
||||||
// Generation is done, send extra information.
|
const json data = format_final_response(llama, "", llama.generated_token_probs);
|
||||||
: format_final_response(llama, to_send, llama.generated_token_probs);
|
|
||||||
|
|
||||||
const std::string str =
|
const std::string str =
|
||||||
"data: " +
|
"data: " +
|
||||||
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||||
"\n\n";
|
"\n\n";
|
||||||
|
|
||||||
LOG_VERBOSE("data stream", {
|
LOG_VERBOSE("data stream", {
|
||||||
{ "to_send", str }
|
{ "to_send", str }
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!sink.write(str.data(), str.size())) {
|
if (!sink.write(str.data(), str.size())) {
|
||||||
LOG_VERBOSE("stream closed", {});
|
LOG_VERBOSE("stream closed", {});
|
||||||
llama_print_timings(llama.ctx);
|
llama_print_timings(llama.ctx);
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user