From 1905ba1a225ebd42c9033d57223c5c35433fd0e2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 24 Oct 2024 10:44:00 +0300 Subject: [PATCH] server : check that the prompt fits in the slot's context ggml-ci --- convert_hf_to_gguf.py | 3 +++ convert_hf_to_gguf_update.py | 1 + examples/server/server.cpp | 7 ++++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7e552a71b..a34dabe23 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -573,6 +573,9 @@ class Model: if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": # ref: https://huggingface.co/BAAI/bge-small-en-v1.5 res = "bert-bge" + if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7": + # ref: https://huggingface.co/BAAI/bge-large-zh-v1.5 + res = "bert-bge-large" if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166": # ref: https://huggingface.co/mosaicml/mpt-7b res = "mpt" diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 022354a3b..28cd02e5a 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -72,6 +72,7 @@ models = [ {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", }, {"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", }, {"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", }, + {"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", }, {"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", }, {"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", }, {"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", }, diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3992108e7..2cb1a1bce 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2115,12 +2115,17 @@ struct server_context { } if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) { - // this prompt is too large to process - discard it if (slot.n_prompt_tokens > n_ubatch) { slot.release(); send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER); continue; } + + if (slot.n_prompt_tokens > slot.n_ctx) { + slot.release(); + send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_SERVER); + continue; + } } else { if (!params.ctx_shift) { // if context shift is disabled, we make sure prompt size is smaller than KV size