mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 10:54:36 +00:00
Merge branch 'master' of https://github.com/piDack/llama.cpp into support_glm_edge_model
This commit is contained in:
commit
a249dc0fbb
@ -24,6 +24,16 @@ insert_final_newline = unset
|
||||
[examples/server/public/*]
|
||||
indent_size = 2
|
||||
|
||||
[examples/server/public/deps_*]
|
||||
trim_trailing_whitespace = unset
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
|
||||
[examples/server/deps_*]
|
||||
trim_trailing_whitespace = unset
|
||||
indent_style = unset
|
||||
indent_size = unset
|
||||
|
||||
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
||||
indent_style = tab
|
||||
|
||||
|
17
Makefile
17
Makefile
@ -1455,22 +1455,13 @@ llama-server: \
|
||||
examples/server/server.cpp \
|
||||
examples/server/utils.hpp \
|
||||
examples/server/httplib.h \
|
||||
examples/server/colorthemes.css.hpp \
|
||||
examples/server/style.css.hpp \
|
||||
examples/server/theme-beeninorder.css.hpp \
|
||||
examples/server/theme-ketivah.css.hpp \
|
||||
examples/server/theme-mangotango.css.hpp \
|
||||
examples/server/theme-playground.css.hpp \
|
||||
examples/server/theme-polarnight.css.hpp \
|
||||
examples/server/theme-snowstorm.css.hpp \
|
||||
examples/server/index.html.hpp \
|
||||
examples/server/index-new.html.hpp \
|
||||
examples/server/index.js.hpp \
|
||||
examples/server/completion.js.hpp \
|
||||
examples/server/system-prompts.js.hpp \
|
||||
examples/server/prompt-formats.js.hpp \
|
||||
examples/server/json-schema-to-grammar.mjs.hpp \
|
||||
examples/server/loading.html.hpp \
|
||||
examples/server/deps_daisyui.min.css.hpp \
|
||||
examples/server/deps_markdown-it.js.hpp \
|
||||
examples/server/deps_tailwindcss.js.hpp \
|
||||
examples/server/deps_vue.esm-browser.js.hpp \
|
||||
common/json.hpp \
|
||||
common/stb_image.h \
|
||||
$(OBJ_ALL)
|
||||
|
@ -3748,10 +3748,7 @@ class JaisModel(Model):
|
||||
|
||||
# Embeddings scale
|
||||
self.embeddings_scale = 1.0
|
||||
# note: For some JAIS flavors, output is tied to (same as) wte in original model
|
||||
self.output_is_wte = False
|
||||
if 'mup_embeddings_scale' in self.hparams:
|
||||
self.output_is_wte = True # Hack (?)
|
||||
self.embeddings_scale = self.hparams['mup_embeddings_scale']
|
||||
elif 'embeddings_scale' in self.hparams:
|
||||
self.embeddings_scale = self.hparams['embeddings_scale']
|
||||
@ -3808,10 +3805,7 @@ class JaisModel(Model):
|
||||
|
||||
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
|
||||
tensors.append((new_name, data_torch * self.embeddings_scale))
|
||||
if self.output_is_wte:
|
||||
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
|
||||
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
|
||||
assert not self.output_is_wte
|
||||
tensors.append((new_name, data_torch * self.width_scale))
|
||||
else:
|
||||
tensors.append((new_name, data_torch))
|
||||
|
@ -15,22 +15,13 @@ set(TARGET_SRCS
|
||||
httplib.h
|
||||
)
|
||||
set(PUBLIC_ASSETS
|
||||
colorthemes.css
|
||||
style.css
|
||||
theme-beeninorder.css
|
||||
theme-ketivah.css
|
||||
theme-mangotango.css
|
||||
theme-playground.css
|
||||
theme-polarnight.css
|
||||
theme-snowstorm.css
|
||||
index.html
|
||||
index-new.html
|
||||
index.js
|
||||
completion.js
|
||||
system-prompts.js
|
||||
prompt-formats.js
|
||||
json-schema-to-grammar.mjs
|
||||
loading.html
|
||||
deps_daisyui.min.css
|
||||
deps_markdown-it.js
|
||||
deps_tailwindcss.js
|
||||
deps_vue.esm-browser.js
|
||||
)
|
||||
|
||||
foreach(asset ${PUBLIC_ASSETS})
|
||||
|
@ -928,6 +928,16 @@ Apart from error types supported by OAI, we also have custom types that are spec
|
||||
}
|
||||
```
|
||||
|
||||
### Legacy completion web UI
|
||||
|
||||
A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggerganov/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./examples/server/public_legacy`
|
||||
|
||||
For example:
|
||||
|
||||
```sh
|
||||
./llama-server -m my_model.gguf -c 8192 --path ./examples/server/public_legacy
|
||||
```
|
||||
|
||||
### Extending or building alternative Web Front End
|
||||
|
||||
You can extend the front end by running the server binary with `--path` set to `./your-directory` and importing `/completion.js` to get access to the llamaComplete() method.
|
||||
|
@ -1,7 +1,7 @@
|
||||
import * as readline from 'node:readline'
|
||||
import { stdin, stdout } from 'node:process'
|
||||
import { readFileSync } from 'node:fs'
|
||||
import { SchemaConverter } from './public/json-schema-to-grammar.mjs'
|
||||
import { SchemaConverter } from './public_legacy/json-schema-to-grammar.mjs'
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const grammarJsonSchemaFile = args.find(
|
||||
|
@ -6,5 +6,20 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
PUBLIC=$DIR/public
|
||||
|
||||
echo "download js bundle files"
|
||||
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks > $PUBLIC/index.js
|
||||
echo >> $PUBLIC/index.js # add newline
|
||||
|
||||
# Note for contributors: Always pin to a specific version "maj.min.patch" to avoid breaking the CI
|
||||
|
||||
curl -L https://cdn.tailwindcss.com/3.4.14 > $PUBLIC/deps_tailwindcss.js
|
||||
echo >> $PUBLIC/deps_tailwindcss.js # add newline
|
||||
|
||||
curl -L https://cdnjs.cloudflare.com/ajax/libs/daisyui/4.12.14/styled.min.css > $PUBLIC/deps_daisyui.min.css
|
||||
curl -L https://cdnjs.cloudflare.com/ajax/libs/daisyui/4.12.14/themes.min.css >> $PUBLIC/deps_daisyui.min.css
|
||||
echo >> $PUBLIC/deps_daisyui.min.css # add newline
|
||||
|
||||
curl -L https://unpkg.com/vue@3.5.12/dist/vue.esm-browser.js > $PUBLIC/deps_vue.esm-browser.js
|
||||
echo >> $PUBLIC/deps_vue.esm-browser.js # add newline
|
||||
|
||||
curl -L https://cdnjs.cloudflare.com/ajax/libs/markdown-it/13.0.2/markdown-it.js > $PUBLIC/deps_markdown-it.js
|
||||
echo >> $PUBLIC/deps_markdown-it.js # add newline
|
||||
|
||||
ls -lah $PUBLIC
|
||||
|
@ -1,12 +1,16 @@
|
||||
const paramDefaults = {
|
||||
stream: true,
|
||||
n_predict: 500,
|
||||
temperature: 0.2,
|
||||
stop: ["</s>"]
|
||||
};
|
||||
|
||||
let generation_settings = null;
|
||||
|
||||
export class CompletionError extends Error {
|
||||
constructor(message, name, data) {
|
||||
super(message);
|
||||
this.name = name;
|
||||
}
|
||||
};
|
||||
|
||||
// Completes the prompt as a generator. Recommended for most use cases.
|
||||
//
|
||||
@ -29,7 +33,7 @@ export async function* llama(prompt, params = {}, config = {}) {
|
||||
|
||||
const completionParams = { ...paramDefaults, ...params, prompt };
|
||||
|
||||
const response = await fetch(`${api_url}/completion`, {
|
||||
const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(completionParams),
|
||||
headers: {
|
||||
@ -41,6 +45,18 @@ export async function* llama(prompt, params = {}, config = {}) {
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
const status = response.status;
|
||||
if (status !== 200) {
|
||||
try {
|
||||
const body = await response.json();
|
||||
if (body && body.error && body.error.message) {
|
||||
throw new CompletionError(body.error.message, 'ServerError');
|
||||
}
|
||||
} catch (err) {
|
||||
throw new CompletionError(err.message, 'ServerError');
|
||||
}
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
@ -78,7 +94,12 @@ export async function* llama(prompt, params = {}, config = {}) {
|
||||
for (const line of lines) {
|
||||
const match = regex.exec(line);
|
||||
if (match) {
|
||||
result[match[1]] = match[2]
|
||||
result[match[1]] = match[2];
|
||||
if (result.data === '[DONE]') {
|
||||
cont = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// since we know this is llama.cpp, let's just decode the json in data
|
||||
if (result.data) {
|
||||
result.data = JSON.parse(result.data);
|
||||
|
13
examples/server/public/deps_daisyui.min.css
vendored
Normal file
13
examples/server/public/deps_daisyui.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
8442
examples/server/public/deps_markdown-it.js
Normal file
8442
examples/server/public/deps_markdown-it.js
Normal file
File diff suppressed because it is too large
Load Diff
82
examples/server/public/deps_tailwindcss.js
Normal file
82
examples/server/public/deps_tailwindcss.js
Normal file
File diff suppressed because one or more lines are too long
18160
examples/server/public/deps_vue.esm-browser.js
Normal file
18160
examples/server/public/deps_vue.esm-browser.js
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
209
examples/server/public_legacy/completion.js
Normal file
209
examples/server/public_legacy/completion.js
Normal file
@ -0,0 +1,209 @@
|
||||
const paramDefaults = {
|
||||
stream: true,
|
||||
n_predict: 500,
|
||||
temperature: 0.2,
|
||||
stop: ["</s>"]
|
||||
};
|
||||
|
||||
let generation_settings = null;
|
||||
|
||||
|
||||
// Completes the prompt as a generator. Recommended for most use cases.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import { llama } from '/completion.js'
|
||||
//
|
||||
// const request = llama("Tell me a joke", {n_predict: 800})
|
||||
// for await (const chunk of request) {
|
||||
// document.write(chunk.data.content)
|
||||
// }
|
||||
//
|
||||
export async function* llama(prompt, params = {}, config = {}) {
|
||||
let controller = config.controller;
|
||||
const api_url = config.api_url?.replace(/\/+$/, '') || "";
|
||||
|
||||
if (!controller) {
|
||||
controller = new AbortController();
|
||||
}
|
||||
|
||||
const completionParams = { ...paramDefaults, ...params, prompt };
|
||||
|
||||
const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(completionParams),
|
||||
headers: {
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'text/event-stream',
|
||||
...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
|
||||
},
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
let content = "";
|
||||
let leftover = ""; // Buffer for partially read lines
|
||||
|
||||
try {
|
||||
let cont = true;
|
||||
|
||||
while (cont) {
|
||||
const result = await reader.read();
|
||||
if (result.done) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Add any leftover data to the current chunk of data
|
||||
const text = leftover + decoder.decode(result.value);
|
||||
|
||||
// Check if the last character is a line break
|
||||
const endsWithLineBreak = text.endsWith('\n');
|
||||
|
||||
// Split the text into lines
|
||||
let lines = text.split('\n');
|
||||
|
||||
// If the text doesn't end with a line break, then the last line is incomplete
|
||||
// Store it in leftover to be added to the next chunk of data
|
||||
if (!endsWithLineBreak) {
|
||||
leftover = lines.pop();
|
||||
} else {
|
||||
leftover = ""; // Reset leftover if we have a line break at the end
|
||||
}
|
||||
|
||||
// Parse all sse events and add them to result
|
||||
const regex = /^(\S+):\s(.*)$/gm;
|
||||
for (const line of lines) {
|
||||
const match = regex.exec(line);
|
||||
if (match) {
|
||||
result[match[1]] = match[2];
|
||||
if (result.data === '[DONE]') {
|
||||
cont = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// since we know this is llama.cpp, let's just decode the json in data
|
||||
if (result.data) {
|
||||
result.data = JSON.parse(result.data);
|
||||
content += result.data.content;
|
||||
|
||||
// yield
|
||||
yield result;
|
||||
|
||||
// if we got a stop token from server, we will break here
|
||||
if (result.data.stop) {
|
||||
if (result.data.generation_settings) {
|
||||
generation_settings = result.data.generation_settings;
|
||||
}
|
||||
cont = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (result.error) {
|
||||
try {
|
||||
result.error = JSON.parse(result.error);
|
||||
if (result.error.message.includes('slot unavailable')) {
|
||||
// Throw an error to be caught by upstream callers
|
||||
throw new Error('slot unavailable');
|
||||
} else {
|
||||
console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
|
||||
}
|
||||
} catch(e) {
|
||||
console.error(`llama.cpp error ${result.error}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
if (e.name !== 'AbortError') {
|
||||
console.error("llama error: ", e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
finally {
|
||||
controller.abort();
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
// Call llama, return an event target that you can subscribe to
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import { llamaEventTarget } from '/completion.js'
|
||||
//
|
||||
// const conn = llamaEventTarget(prompt)
|
||||
// conn.addEventListener("message", (chunk) => {
|
||||
// document.write(chunk.detail.content)
|
||||
// })
|
||||
//
|
||||
export const llamaEventTarget = (prompt, params = {}, config = {}) => {
|
||||
const eventTarget = new EventTarget();
|
||||
(async () => {
|
||||
let content = "";
|
||||
for await (const chunk of llama(prompt, params, config)) {
|
||||
if (chunk.data) {
|
||||
content += chunk.data.content;
|
||||
eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
|
||||
}
|
||||
if (chunk.data.generation_settings) {
|
||||
eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
|
||||
}
|
||||
if (chunk.data.timings) {
|
||||
eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
|
||||
}
|
||||
}
|
||||
eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
|
||||
})();
|
||||
return eventTarget;
|
||||
}
|
||||
|
||||
// Call llama, return a promise that resolves to the completed text. This does not support streaming
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// llamaPromise(prompt).then((content) => {
|
||||
// document.write(content)
|
||||
// })
|
||||
//
|
||||
// or
|
||||
//
|
||||
// const content = await llamaPromise(prompt)
|
||||
// document.write(content)
|
||||
//
|
||||
export const llamaPromise = (prompt, params = {}, config = {}) => {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
let content = "";
|
||||
try {
|
||||
for await (const chunk of llama(prompt, params, config)) {
|
||||
content += chunk.data.content;
|
||||
}
|
||||
resolve(content);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* (deprecated)
|
||||
*/
|
||||
export const llamaComplete = async (params, controller, callback) => {
|
||||
for await (const chunk of llama(params.prompt, params, { controller })) {
|
||||
callback(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the model info from the server. This is useful for getting the context window and so on.
|
||||
export const llamaModelInfo = async (config = {}) => {
|
||||
if (!generation_settings) {
|
||||
const api_url = config.api_url?.replace(/\/+$/, '') || "";
|
||||
const props = await fetch(`${api_url}/props`).then(r => r.json());
|
||||
generation_settings = props.default_generation_settings;
|
||||
}
|
||||
return generation_settings;
|
||||
}
|
Before Width: | Height: | Size: 4.0 KiB After Width: | Height: | Size: 4.0 KiB |
1303
examples/server/public_legacy/index.html
Normal file
1303
examples/server/public_legacy/index.html
Normal file
File diff suppressed because it is too large
Load Diff
12
examples/server/public_legacy/loading.html
Normal file
12
examples/server/public_legacy/loading.html
Normal file
@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="refresh" content="5">
|
||||
</head>
|
||||
<body>
|
||||
<div id="loading">
|
||||
The model is loading. Please wait.<br/>
|
||||
The user interface will appear soon.
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@ -14,22 +14,13 @@
|
||||
#define MIMETYPE_JSON "application/json; charset=utf-8"
|
||||
|
||||
// auto generated files (update with ./deps.sh)
|
||||
#include "colorthemes.css.hpp"
|
||||
#include "style.css.hpp"
|
||||
#include "theme-beeninorder.css.hpp"
|
||||
#include "theme-ketivah.css.hpp"
|
||||
#include "theme-mangotango.css.hpp"
|
||||
#include "theme-playground.css.hpp"
|
||||
#include "theme-polarnight.css.hpp"
|
||||
#include "theme-snowstorm.css.hpp"
|
||||
#include "index.html.hpp"
|
||||
#include "index-new.html.hpp"
|
||||
#include "index.js.hpp"
|
||||
#include "completion.js.hpp"
|
||||
#include "system-prompts.js.hpp"
|
||||
#include "prompt-formats.js.hpp"
|
||||
#include "json-schema-to-grammar.mjs.hpp"
|
||||
#include "loading.html.hpp"
|
||||
#include "deps_daisyui.min.css.hpp"
|
||||
#include "deps_markdown-it.js.hpp"
|
||||
#include "deps_tailwindcss.js.hpp"
|
||||
#include "deps_vue.esm-browser.js.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
@ -2285,16 +2276,6 @@ int main(int argc, char ** argv) {
|
||||
std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
|
||||
|
||||
svr->set_default_headers({{"Server", "llama.cpp"}});
|
||||
|
||||
// CORS preflight
|
||||
svr->Options(R"(.*)", [](const httplib::Request &, httplib::Response & res) {
|
||||
// Access-Control-Allow-Origin is already set by middleware
|
||||
res.set_header("Access-Control-Allow-Credentials", "true");
|
||||
res.set_header("Access-Control-Allow-Methods", "POST");
|
||||
res.set_header("Access-Control-Allow-Headers", "*");
|
||||
return res.set_content("", "text/html"); // blank response, no data
|
||||
});
|
||||
|
||||
svr->set_logger(log_server_request);
|
||||
|
||||
auto res_error = [](httplib::Response & res, const json & error_data) {
|
||||
@ -2407,6 +2388,14 @@ int main(int argc, char ** argv) {
|
||||
// register server middlewares
|
||||
svr->set_pre_routing_handler([&middleware_validate_api_key, &middleware_server_state](const httplib::Request & req, httplib::Response & res) {
|
||||
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
|
||||
// If this is OPTIONS request, skip validation because browsers don't include Authorization header
|
||||
if (req.method == "OPTIONS") {
|
||||
res.set_header("Access-Control-Allow-Credentials", "true");
|
||||
res.set_header("Access-Control-Allow-Methods", "GET, POST");
|
||||
res.set_header("Access-Control-Allow-Headers", "*");
|
||||
res.set_content("", "text/html"); // blank response, no data
|
||||
return httplib::Server::HandlerResponse::Handled; // skip further processing
|
||||
}
|
||||
if (!middleware_server_state(req, res)) {
|
||||
return httplib::Server::HandlerResponse::Handled;
|
||||
}
|
||||
@ -3116,33 +3105,19 @@ int main(int argc, char ** argv) {
|
||||
// register static assets routes
|
||||
if (!params.public_path.empty()) {
|
||||
// Set the base directory for serving static files
|
||||
svr->set_base_dir(params.public_path);
|
||||
}
|
||||
|
||||
if (!params.api_keys.empty()) {
|
||||
// for now, if API key is set, web UI is unusable
|
||||
svr->Get("/", [&](const httplib::Request &, httplib::Response & res) {
|
||||
return res.set_content("Web UI is disabled because API key is set.", "text/html; charset=utf-8");
|
||||
});
|
||||
bool is_found = svr->set_mount_point("/", params.public_path);
|
||||
if (!is_found) {
|
||||
LOG_ERR("%s: static assets path not found: %s\n", __func__, params.public_path.c_str());
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
// using embedded static files
|
||||
svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8"));
|
||||
svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8"));
|
||||
svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8"));
|
||||
svr->Get("/json-schema-to-grammar.mjs", handle_static_file(json_schema_to_grammar_mjs, json_schema_to_grammar_mjs_len, "text/javascript; charset=utf-8"));
|
||||
|
||||
// add new-ui files
|
||||
svr->Get("/colorthemes.css", handle_static_file(colorthemes_css, colorthemes_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/style.css", handle_static_file(style_css, style_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/theme-beeninorder.css", handle_static_file(theme_beeninorder_css, theme_beeninorder_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/theme-ketivah.css", handle_static_file(theme_ketivah_css, theme_ketivah_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/theme-mangotango.css", handle_static_file(theme_mangotango_css, theme_mangotango_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/theme-playground.css", handle_static_file(theme_playground_css, theme_playground_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/theme-polarnight.css", handle_static_file(theme_polarnight_css, theme_polarnight_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/theme-snowstorm.css", handle_static_file(theme_snowstorm_css, theme_snowstorm_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/index-new.html", handle_static_file(index_new_html, index_new_html_len, "text/html; charset=utf-8"));
|
||||
svr->Get("/system-prompts.js", handle_static_file(system_prompts_js, system_prompts_js_len, "text/javascript; charset=utf-8"));
|
||||
svr->Get("/prompt-formats.js", handle_static_file(prompt_formats_js, prompt_formats_js_len, "text/javascript; charset=utf-8"));
|
||||
svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8"));
|
||||
svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8"));
|
||||
svr->Get("/deps_daisyui.min.css", handle_static_file(deps_daisyui_min_css, deps_daisyui_min_css_len, "text/css; charset=utf-8"));
|
||||
svr->Get("/deps_markdown-it.js", handle_static_file(deps_markdown_it_js, deps_markdown_it_js_len, "text/javascript; charset=utf-8"));
|
||||
svr->Get("/deps_tailwindcss.js", handle_static_file(deps_tailwindcss_js, deps_tailwindcss_js_len, "text/javascript; charset=utf-8"));
|
||||
svr->Get("/deps_vue.esm-browser.js", handle_static_file(deps_vue_esm_browser_js, deps_vue_esm_browser_js_len, "text/javascript; charset=utf-8"));
|
||||
}
|
||||
|
||||
// register API routes
|
||||
|
@ -64,5 +64,5 @@ Feature: Security
|
||||
| localhost | Access-Control-Allow-Origin | localhost |
|
||||
| web.mydomain.fr | Access-Control-Allow-Origin | web.mydomain.fr |
|
||||
| origin | Access-Control-Allow-Credentials | true |
|
||||
| web.mydomain.fr | Access-Control-Allow-Methods | POST |
|
||||
| web.mydomain.fr | Access-Control-Allow-Methods | GET, POST |
|
||||
| web.mydomain.fr | Access-Control-Allow-Headers | * |
|
||||
|
@ -218,12 +218,12 @@ include(CMakePackageConfigHelpers)
|
||||
# all public headers
|
||||
set(GGML_PUBLIC_HEADERS
|
||||
include/ggml.h
|
||||
include/ggml-cpu.h
|
||||
include/ggml-alloc.h
|
||||
include/ggml-backend.h
|
||||
include/ggml-blas.h
|
||||
include/ggml-cann.h
|
||||
include/ggml-cuda.h
|
||||
include/ggml.h
|
||||
include/ggml-kompute.h
|
||||
include/ggml-metal.h
|
||||
include/ggml-rpc.h
|
||||
|
@ -124,7 +124,7 @@ You can use GBNF grammars:
|
||||
- In [llama-cli](../examples/main), passed as the `--json` / `-j` flag
|
||||
- To convert to a grammar ahead of time:
|
||||
- in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
|
||||
- in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI)
|
||||
- in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI)
|
||||
|
||||
Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggerganov/llama.cpp/pull/5978, https://github.com/ggerganov/llama.cpp/pull/6659 & https://github.com/ggerganov/llama.cpp/pull/6555).
|
||||
|
||||
|
@ -114,46 +114,22 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
|
||||
# replace filenames:
|
||||
#
|
||||
# CMakelists.txt -> ggml/CMakeLists.txt
|
||||
# src/CMakeLists.txt -> ggml/src/CMakeLists.txt
|
||||
# cmake/FindSIMD.cmake -> ggml/cmake/FindSIMD.cmake
|
||||
# CMakelists.txt -> ggml/CMakeLists.txt
|
||||
# src/CMakeLists.txt -> ggml/src/CMakeLists.txt
|
||||
# cmake/FindSIMD.cmake -> ggml/cmake/FindSIMD.cmake
|
||||
#
|
||||
# src/ggml.c -> ggml/src/ggml.c
|
||||
# src/ggml-aarch64.c -> ggml/src/ggml-aarch64.c
|
||||
# src/ggml-aarch64.h -> ggml/src/ggml-aarch64.h
|
||||
# src/ggml-alloc.c -> ggml/src/ggml-alloc.c
|
||||
# src/ggml-amx/* -> ggml/src/ggml-amx/
|
||||
# src/ggml-amx.cpp -> ggml/src/ggml-amx.cpp
|
||||
# src/ggml-backend-impl.h -> ggml/src/ggml-backend-impl.h
|
||||
# src/ggml-backend.cpp -> ggml/src/ggml-backend.cpp
|
||||
# src/ggml-cann/* -> ggml/src/ggml-cann/
|
||||
# src/ggml-cann.cpp -> ggml/src/ggml-cann.cpp
|
||||
# src/ggml-common.h -> ggml/src/ggml-common.h
|
||||
# src/ggml-cuda/* -> ggml/src/ggml-cuda/
|
||||
# src/ggml-cuda.cu -> ggml/src/ggml-cuda.cu
|
||||
# src/ggml-impl.h -> ggml/src/ggml-impl.h
|
||||
# src/ggml-kompute.cpp -> ggml/src/ggml-kompute.cpp
|
||||
# src/ggml-metal.m -> ggml/src/ggml-metal.m
|
||||
# src/ggml-quants.c -> ggml/src/ggml-quants.c
|
||||
# src/ggml-quants.h -> ggml/src/ggml-quants.h
|
||||
# src/ggml-rpc.cpp -> ggml/src/ggml-rpc.cpp
|
||||
# src/ggml-sycl/* -> ggml/src/ggml-sycl/
|
||||
# src/ggml-sycl.cpp -> ggml/src/ggml-sycl.cpp
|
||||
# src/ggml-vulkan.cpp -> ggml/src/ggml-vulkan.cpp
|
||||
# src/vulkan-shaders/* -> ggml/src/vulkan-shaders/
|
||||
# src/ggml*.c -> ggml/src/ggml*.c
|
||||
# src/ggml*.cpp -> ggml/src/ggml*.cpp
|
||||
# src/ggml*.h -> ggml/src/ggml*.h
|
||||
# src/ggml*.cu -> ggml/src/ggml*.cu
|
||||
# src/ggml*.m -> ggml/src/ggml*.m
|
||||
# src/ggml-amx/* -> ggml/src/ggml-amx/
|
||||
# src/ggml-cann/* -> ggml/src/ggml-cann/
|
||||
# src/ggml-cuda/* -> ggml/src/ggml-cuda/
|
||||
# src/ggml-sycl/* -> ggml/src/ggml-sycl/
|
||||
# src/vulkan-shaders/* -> ggml/src/vulkan-shaders/
|
||||
#
|
||||
# include/ggml.h -> ggml/include/ggml.h
|
||||
# include/ggml-alloc.h -> ggml/include/ggml-alloc.h
|
||||
# include/ggml-amx.h -> ggml/include/ggml-amx.h
|
||||
# include/ggml-backend.h -> ggml/include/ggml-backend.h
|
||||
# include/ggml-blas.h -> ggml/include/ggml-blas.h
|
||||
# include/ggml-cann.h -> ggml/include/ggml-cann.h
|
||||
# include/ggml-cuda.h -> ggml/include/ggml-cuda.h
|
||||
# include/ggml-kompute.h -> ggml/include/ggml-kompute.h
|
||||
# include/ggml-metal.h -> ggml/include/ggml-metal.h
|
||||
# include/ggml-rpc.h -> ggml/include/ggml-rpc.h
|
||||
# include/ggml-sycl.h -> ggml/include/ggml-sycl.h
|
||||
# include/ggml-vulkan.h -> ggml/include/ggml-vulkan.h
|
||||
# include/ggml*.h -> ggml/include/ggml*.h
|
||||
#
|
||||
# tests/test-opt.cpp -> tests/test-opt.cpp
|
||||
# tests/test-grad0.cpp -> tests/test-grad0.cpp
|
||||
@ -168,41 +144,17 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
-e 's/([[:space:]]|[ab]\/)CMakeLists.txt/\1ggml\/CMakeLists.txt/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)cmake\/FindSIMD.cmake/\1ggml\/cmake\/FindSIMD.cmake/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml\.c/\1ggml\/src\/ggml.c/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-aarch64\.c/\1ggml\/src\/ggml-aarch64.c/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-aarch64\.h/\1ggml\/src\/ggml-aarch64.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-alloc\.c/\1ggml\/src\/ggml-alloc.c/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.c/\1ggml\/src\/ggml\1.c/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cpp/\1ggml\/src\/ggml\1.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.h/\1ggml\/src\/ggml\1.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cu/\1ggml\/src\/ggml\1.cu/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.m/\1ggml\/src\/ggml\1.m/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-amx\//\1ggml\/src\/ggml-amx\//g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-amx\.cpp/\1ggml\/src\/ggml-amx.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-backend-impl\.h/\1ggml\/src\/ggml-backend-impl.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-backend\.cpp/\1ggml\/src\/ggml-backend.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-cann\//\1ggml\/src\/ggml-cann\//g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-cann\.cpp/\1ggml\/src\/ggml-cann.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-common\.h/\1ggml\/src\/ggml-common.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-cuda\//\1ggml\/src\/ggml-cuda\//g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-cuda\.cu/\1ggml\/src\/ggml-cuda.cu/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-impl\.h/\1ggml\/src\/ggml-impl.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-kompute\.cpp/\1ggml\/src\/ggml-kompute.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-metal\.m/\1ggml\/src\/ggml-metal.m/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-quants\.c/\1ggml\/src\/ggml-quants.c/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-quants\.h/\1ggml\/src\/ggml-quants.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-rpc\.cpp/\1ggml\/src\/ggml-rpc.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\//\1ggml\/src\/ggml-sycl\//g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\.cpp/\1ggml\/src\/ggml-sycl.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/ggml-vulkan\.cpp/\1ggml\/src\/ggml-vulkan.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)src\/vulkan-shaders\//\1ggml\/src\/vulkan-shaders\//g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml\.h/\1ggml\/include\/ggml.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-alloc\.h/\1ggml\/include\/ggml-alloc.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-amx\.h/\1ggml\/include\/ggml-amx.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-backend\.h/\1ggml\/include\/ggml-backend.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-blas\.h/\1ggml\/include\/ggml-blas.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-cann\.h/\1ggml\/include\/ggml-cann.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-cuda\.h/\1ggml\/include\/ggml-cuda.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-kompute\.h/\1ggml\/include\/ggml-kompute.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-metal\.h/\1ggml\/include\/ggml-metal.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-rpc\.h/\1ggml\/include\/ggml-rpc.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-sycl\.h/\1ggml\/include\/ggml-sycl.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml-vulkan\.h/\1ggml\/include\/ggml-vulkan.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\1.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)examples\/common\.h/\1examples\/common.h/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)examples\/common\.cpp/\1examples\/common.cpp/g' \
|
||||
-e 's/([[:space:]]|[ab]\/)examples\/common-ggml\.h/\1examples\/common-ggml.h/g' \
|
||||
|
@ -1 +1 @@
|
||||
a099cb514d6687e436a5a423d1fb0448be0feb20
|
||||
89952d649e0c5cabbb9ff8c4906f5a843a789fb2
|
||||
|
@ -4,43 +4,18 @@ cp -rpv ../ggml/CMakeLists.txt ./ggml/CMakeLists.txt
|
||||
cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt
|
||||
cp -rpv ../ggml/cmake/FindSIMD.cmake ./ggml/cmake/FindSIMD.cmake
|
||||
|
||||
cp -rpv ../ggml/src/ggml.c ./ggml/src/ggml.c
|
||||
cp -rpv ../ggml/src/ggml-aarch64.c ./ggml/src/ggml-aarch64.c
|
||||
cp -rpv ../ggml/src/ggml-aarch64.h ./ggml/src/ggml-aarch64.h
|
||||
cp -rpv ../ggml/src/ggml-alloc.c ./ggml/src/ggml-alloc.c
|
||||
cp -rpv ../ggml/src/ggml-amx/* ./ggml/src/ggml-amx/
|
||||
cp -rpv ../ggml/src/ggml-amx.cpp ./ggml/src/ggml-amx.cpp
|
||||
cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml/src/ggml-backend-impl.h
|
||||
cp -rpv ../ggml/src/ggml-backend.cpp ./ggml/src/ggml-backend.cpp
|
||||
cp -rpv ../ggml/src/ggml-cann/* ./ggml/src/ggml-cann/
|
||||
cp -rpv ../ggml/src/ggml-cann.cpp ./ggml/src/ggml-cann.cpp
|
||||
cp -rpv ../ggml/src/ggml-common.h ./ggml/src/ggml-common.h
|
||||
cp -rpv ../ggml/src/ggml-cuda/* ./ggml/src/ggml-cuda/
|
||||
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml/src/ggml-cuda.cu
|
||||
cp -rpv ../ggml/src/ggml-impl.h ./ggml/src/ggml-impl.h
|
||||
cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml/src/ggml-kompute.cpp
|
||||
cp -rpv ../ggml/src/ggml-metal.m ./ggml/src/ggml-metal.m
|
||||
cp -rpv ../ggml/src/ggml-metal.metal ./ggml/src/ggml-metal.metal
|
||||
cp -rpv ../ggml/src/ggml-quants.c ./ggml/src/ggml-quants.c
|
||||
cp -rpv ../ggml/src/ggml-quants.h ./ggml/src/ggml-quants.h
|
||||
cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml/src/ggml-rpc.cpp
|
||||
cp -rpv ../ggml/src/ggml-sycl/* ./ggml/src/ggml-sycl/
|
||||
cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml/src/ggml-sycl.cpp
|
||||
cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml/src/ggml-vulkan.cpp
|
||||
cp -rpv ../ggml/src/vulkan-shaders/* ./ggml/src/vulkan-shaders/
|
||||
cp -rpv ../ggml/src/ggml*.c ./ggml/src/
|
||||
cp -rpv ../ggml/src/ggml*.cpp ./ggml/src/
|
||||
cp -rpv ../ggml/src/ggml*.h ./ggml/src/
|
||||
cp -rpv ../ggml/src/ggml*.cu ./ggml/src/
|
||||
cp -rpv ../ggml/src/ggml*.m ./ggml/src/
|
||||
cp -rpv ../ggml/src/ggml-amx/* ./ggml/src/ggml-amx/
|
||||
cp -rpv ../ggml/src/ggml-cann/* ./ggml/src/ggml-cann/
|
||||
cp -rpv ../ggml/src/ggml-cuda/* ./ggml/src/ggml-cuda/
|
||||
cp -rpv ../ggml/src/ggml-sycl/* ./ggml/src/ggml-sycl/
|
||||
cp -rpv ../ggml/src/vulkan-shaders/* ./ggml/src/vulkan-shaders/
|
||||
|
||||
cp -rpv ../ggml/include/ggml.h ./ggml/include/ggml.h
|
||||
cp -rpv ../ggml/include/ggml-alloc.h ./ggml/include/ggml-alloc.h
|
||||
cp -rpv ../ggml/include/ggml-amx.h ./ggml/include/ggml-amx.h
|
||||
cp -rpv ../ggml/include/ggml-backend.h ./ggml/include/ggml-backend.h
|
||||
cp -rpv ../ggml/include/ggml-blas.h ./ggml/include/ggml-blas.h
|
||||
cp -rpv ../ggml/include/ggml-cann.h ./ggml/include/ggml-cann.h
|
||||
cp -rpv ../ggml/include/ggml-cuda.h ./ggml/include/ggml-cuda.h
|
||||
cp -rpv ../ggml/include/ggml-kompute.h ./ggml/include/ggml-kompute.h
|
||||
cp -rpv ../ggml/include/ggml-metal.h ./ggml/include/ggml-metal.h
|
||||
cp -rpv ../ggml/include/ggml-rpc.h ./ggml/include/ggml-rpc.h
|
||||
cp -rpv ../ggml/include/ggml-sycl.h ./ggml/include/ggml-sycl.h
|
||||
cp -rpv ../ggml/include/ggml-vulkan.h ./ggml/include/ggml-vulkan.h
|
||||
cp -rpv ../ggml/include/ggml*.h ./ggml/include/
|
||||
|
||||
cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
|
||||
cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp
|
||||
|
@ -1876,8 +1876,11 @@ static void llama_sampler_dry_reset(struct llama_sampler * smpl) {
|
||||
static struct llama_sampler * llama_sampler_dry_clone(const struct llama_sampler * smpl) {
|
||||
const auto * ctx = (llama_sampler_dry *) smpl->ctx;
|
||||
|
||||
// nullptr is passed as vocab because it is only needed for raw sequence breaker processing, which we have already done and will be copying
|
||||
auto * result = llama_sampler_init_dry(nullptr, ctx->dry_multiplier, ctx->dry_base, ctx->dry_allowed_length, ctx->dry_penalty_last_n, NULL, 0);
|
||||
llama_vocab dummy_vocab;
|
||||
|
||||
// dummy vocab is passed because it is only needed for raw sequence breaker processing, which we have already done and will simply be copying
|
||||
auto * result = llama_sampler_init_dry_impl(dummy_vocab, ctx->total_context_size, ctx->dry_multiplier, ctx->dry_base, ctx->dry_allowed_length, ctx->dry_penalty_last_n, NULL, 0);
|
||||
|
||||
// Copy the state, including the processed breakers
|
||||
{
|
||||
auto * result_ctx = (llama_sampler_dry *) result->ctx;
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { readFileSync } from "fs"
|
||||
import { SchemaConverter } from "../examples/server/public/json-schema-to-grammar.mjs"
|
||||
import { SchemaConverter } from "../examples/server/public_legacy/json-schema-to-grammar.mjs"
|
||||
|
||||
const [, , file] = process.argv
|
||||
const url = `file://${file}`
|
||||
|
Loading…
Reference in New Issue
Block a user