mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 20:14:29 +00:00
server: (web UI) Add samplers sequence customization (#10255)
Some checks failed
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-musa.Dockerfile platforms:linux/amd64 tag:full-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-musa.Dockerfile platforms:linux/amd64 tag:light-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-musa.Dockerfile platforms:linux/amd64 tag:server-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
Python Type-Check / pyright type-check (push) Waiting to run
update-flake-lock / lockfile (push) Has been cancelled
Some checks failed
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-musa.Dockerfile platforms:linux/amd64 tag:full-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-musa.Dockerfile platforms:linux/amd64 tag:light-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-musa.Dockerfile platforms:linux/amd64 tag:server-musa]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
Python Type-Check / pyright type-check (push) Waiting to run
update-flake-lock / lockfile (push) Has been cancelled
* Samplers sequence: simplified and input field. * Removed unused function * Modify and use `settings-modal-short-input` * rename "name" --> "label" --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
parent
f245cc28d4
commit
bcdb7a2386
@ -212,6 +212,9 @@
|
|||||||
<details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
|
<details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
|
||||||
<summary class="collapse-title font-bold">Other sampler settings</summary>
|
<summary class="collapse-title font-bold">Other sampler settings</summary>
|
||||||
<div class="collapse-content">
|
<div class="collapse-content">
|
||||||
|
<!-- Samplers queue -->
|
||||||
|
<settings-modal-short-input label="Samplers queue" :config-key="'samplers'" :config-default="configDefault" :config-info="configInfo" v-model="config.samplers"></settings-modal-short-input>
|
||||||
|
<!-- Samplers -->
|
||||||
<template v-for="configKey in ['dynatemp_range', 'dynatemp_exponent', 'typical_p', 'xtc_probability', 'xtc_threshold']">
|
<template v-for="configKey in ['dynatemp_range', 'dynatemp_exponent', 'typical_p', 'xtc_probability', 'xtc_threshold']">
|
||||||
<settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]" />
|
<settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]" />
|
||||||
</template>
|
</template>
|
||||||
@ -231,6 +234,7 @@
|
|||||||
<summary class="collapse-title font-bold">Advanced config</summary>
|
<summary class="collapse-title font-bold">Advanced config</summary>
|
||||||
<div class="collapse-content">
|
<div class="collapse-content">
|
||||||
<label class="form-control mb-2">
|
<label class="form-control mb-2">
|
||||||
|
<!-- Custom parameters input -->
|
||||||
<div class="label inline">Custom JSON config (For more info, refer to <a class="underline" href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md" target="_blank" rel="noopener noreferrer">server documentation</a>)</div>
|
<div class="label inline">Custom JSON config (For more info, refer to <a class="underline" href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md" target="_blank" rel="noopener noreferrer">server documentation</a>)</div>
|
||||||
<textarea class="textarea textarea-bordered h-24" placeholder="Example: { "mirostat": 1, "min_p": 0.1 }" v-model="config.custom"></textarea>
|
<textarea class="textarea textarea-bordered h-24" placeholder="Example: { "mirostat": 1, "min_p": 0.1 }" v-model="config.custom"></textarea>
|
||||||
</label>
|
</label>
|
||||||
@ -253,7 +257,7 @@
|
|||||||
<label class="input input-bordered join-item grow flex items-center gap-2 mb-2">
|
<label class="input input-bordered join-item grow flex items-center gap-2 mb-2">
|
||||||
<!-- Show help message on hovering on the input label -->
|
<!-- Show help message on hovering on the input label -->
|
||||||
<div class="dropdown dropdown-hover">
|
<div class="dropdown dropdown-hover">
|
||||||
<div tabindex="0" role="button" class="font-bold">{{ configKey }}</div>
|
<div tabindex="0" role="button" class="font-bold">{{ label || configKey }}</div>
|
||||||
<div class="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
|
<div class="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
|
||||||
{{ configInfo[configKey] || '(no help message available)' }}
|
{{ configInfo[configKey] || '(no help message available)' }}
|
||||||
</div>
|
</div>
|
||||||
@ -282,6 +286,7 @@
|
|||||||
apiKey: '',
|
apiKey: '',
|
||||||
systemMessage: 'You are a helpful assistant.',
|
systemMessage: 'You are a helpful assistant.',
|
||||||
// make sure these default values are in sync with `common.h`
|
// make sure these default values are in sync with `common.h`
|
||||||
|
samplers: 'dkypmxt',
|
||||||
temperature: 0.8,
|
temperature: 0.8,
|
||||||
dynatemp_range: 0.0,
|
dynatemp_range: 0.0,
|
||||||
dynatemp_exponent: 1.0,
|
dynatemp_exponent: 1.0,
|
||||||
@ -305,6 +310,7 @@
|
|||||||
const CONFIG_INFO = {
|
const CONFIG_INFO = {
|
||||||
apiKey: 'Set the API Key if you are using --api-key option for the server.',
|
apiKey: 'Set the API Key if you are using --api-key option for the server.',
|
||||||
systemMessage: 'The starting message that defines how model should behave.',
|
systemMessage: 'The starting message that defines how model should behave.',
|
||||||
|
samplers: 'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
|
||||||
temperature: 'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
|
temperature: 'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
|
||||||
dynatemp_range: 'Addon for the temperature sampler. The added value to the range of dynamic temperature, which adjusts probabilities by entropy of tokens.',
|
dynatemp_range: 'Addon for the temperature sampler. The added value to the range of dynamic temperature, which adjusts probabilities by entropy of tokens.',
|
||||||
dynatemp_exponent: 'Addon for the temperature sampler. Smoothes out the probability redistribution based on the most probable token.',
|
dynatemp_exponent: 'Addon for the temperature sampler. Smoothes out the probability redistribution based on the most probable token.',
|
||||||
@ -352,10 +358,16 @@
|
|||||||
{ props: ["source"] }
|
{ props: ["source"] }
|
||||||
);
|
);
|
||||||
|
|
||||||
// inout field to be used by settings modal
|
// input field to be used by settings modal
|
||||||
const SettingsModalShortInput = defineComponent({
|
const SettingsModalShortInput = defineComponent({
|
||||||
template: document.getElementById('settings-modal-short-input').innerHTML,
|
template: document.getElementById('settings-modal-short-input').innerHTML,
|
||||||
props: ['configKey', 'configDefault', 'configInfo', 'modelValue'],
|
props: {
|
||||||
|
label: { type: String, required: false },
|
||||||
|
configKey: String,
|
||||||
|
configDefault: Object,
|
||||||
|
configInfo: Object,
|
||||||
|
modelValue: [Object, String, Number],
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
// coversations is stored in localStorage
|
// coversations is stored in localStorage
|
||||||
@ -546,6 +558,7 @@
|
|||||||
],
|
],
|
||||||
stream: true,
|
stream: true,
|
||||||
cache_prompt: true,
|
cache_prompt: true,
|
||||||
|
samplers: this.config.samplers,
|
||||||
temperature: this.config.temperature,
|
temperature: this.config.temperature,
|
||||||
dynatemp_range: this.config.dynatemp_range,
|
dynatemp_range: this.config.dynatemp_range,
|
||||||
dynatemp_exponent: this.config.dynatemp_exponent,
|
dynatemp_exponent: this.config.dynatemp_exponent,
|
||||||
|
@ -927,14 +927,22 @@ struct server_context {
|
|||||||
|
|
||||||
{
|
{
|
||||||
const auto & samplers = data.find("samplers");
|
const auto & samplers = data.find("samplers");
|
||||||
if (samplers != data.end() && samplers->is_array()) {
|
if (samplers != data.end()) {
|
||||||
std::vector<std::string> sampler_names;
|
if (samplers->is_array()) {
|
||||||
for (const auto & name : *samplers) {
|
std::vector<std::string> sampler_names;
|
||||||
if (name.is_string()) {
|
for (const auto & name : *samplers) {
|
||||||
sampler_names.emplace_back(name);
|
if (name.is_string()) {
|
||||||
|
sampler_names.emplace_back(name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
slot.sparams.samplers = common_sampler_types_from_names(sampler_names, false);
|
||||||
|
} else if (samplers->is_string()){
|
||||||
|
std::string sampler_string;
|
||||||
|
for (const auto & name : *samplers) {
|
||||||
|
sampler_string += name;
|
||||||
|
}
|
||||||
|
slot.sparams.samplers = common_sampler_types_from_chars(sampler_string);
|
||||||
}
|
}
|
||||||
slot.sparams.samplers = common_sampler_types_from_names(sampler_names, false);
|
|
||||||
} else {
|
} else {
|
||||||
slot.sparams.samplers = default_sparams.samplers;
|
slot.sparams.samplers = default_sparams.samplers;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user