common : fix duplicated file name with hf_repo and hf_file (#10550)
Some checks are pending
flake8 Lint / Lint (push) Waiting to run
Python Type-Check / pyright type-check (push) Waiting to run

This commit is contained in:
Xuan Son Nguyen 2024-11-27 22:30:52 +01:00 committed by GitHub
parent 3ad5451f3b
commit 9f912511bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 43 additions and 31 deletions

View File

@ -128,7 +128,11 @@ static void common_params_handle_model_default(common_params & params) {
} }
params.hf_file = params.model; params.hf_file = params.model;
} else if (params.model.empty()) { } else if (params.model.empty()) {
params.model = fs_get_cache_file(string_split<std::string>(params.hf_file, '/').back()); // this is to avoid different repo having same file name, or same file name in different subdirs
std::string filename = params.hf_repo + "_" + params.hf_file;
// to make sure we don't have any slashes in the filename
string_replace_all(filename, "/", "_");
params.model = fs_get_cache_file(filename);
} }
} else if (!params.model_url.empty()) { } else if (!params.model_url.empty()) {
if (params.model.empty()) { if (params.model.empty()) {

View File

@ -829,9 +829,9 @@ struct common_init_result common_init_from_params(common_params & params) {
llama_model * model = nullptr; llama_model * model = nullptr;
if (!params.hf_repo.empty() && !params.hf_file.empty()) { if (!params.hf_repo.empty() && !params.hf_file.empty()) {
model = common_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams); model = common_load_model_from_hf(params.hf_repo, params.hf_file, params.model, params.hf_token, mparams);
} else if (!params.model_url.empty()) { } else if (!params.model_url.empty()) {
model = common_load_model_from_url(params.model_url.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams); model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams);
} else { } else {
model = llama_load_model_from_file(params.model.c_str(), mparams); model = llama_load_model_from_file(params.model.c_str(), mparams);
} }
@ -1342,17 +1342,17 @@ static bool common_download_file(const std::string & url, const std::string & pa
} }
struct llama_model * common_load_model_from_url( struct llama_model * common_load_model_from_url(
const char * model_url, const std::string & model_url,
const char * path_model, const std::string & local_path,
const char * hf_token, const std::string & hf_token,
const struct llama_model_params & params) { const struct llama_model_params & params) {
// Basic validation of the model_url // Basic validation of the model_url
if (!model_url || strlen(model_url) == 0) { if (model_url.empty()) {
LOG_ERR("%s: invalid model_url\n", __func__); LOG_ERR("%s: invalid model_url\n", __func__);
return NULL; return NULL;
} }
if (!common_download_file(model_url, path_model, hf_token)) { if (!common_download_file(model_url, local_path, hf_token)) {
return NULL; return NULL;
} }
@ -1363,9 +1363,9 @@ struct llama_model * common_load_model_from_url(
/*.no_alloc = */ true, /*.no_alloc = */ true,
/*.ctx = */ NULL, /*.ctx = */ NULL,
}; };
auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params); auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
if (!ctx_gguf) { if (!ctx_gguf) {
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, path_model); LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, local_path.c_str());
return NULL; return NULL;
} }
@ -1384,13 +1384,13 @@ struct llama_model * common_load_model_from_url(
// Verify the first split file format // Verify the first split file format
// and extract split URL and PATH prefixes // and extract split URL and PATH prefixes
{ {
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) { if (!llama_split_prefix(split_prefix, sizeof(split_prefix), local_path.c_str(), 0, n_split)) {
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, path_model, n_split); LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, local_path.c_str(), n_split);
return NULL; return NULL;
} }
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) { if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url.c_str(), 0, n_split)) {
LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url, n_split); LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url.c_str(), n_split);
return NULL; return NULL;
} }
} }
@ -1417,14 +1417,14 @@ struct llama_model * common_load_model_from_url(
} }
} }
return llama_load_model_from_file(path_model, params); return llama_load_model_from_file(local_path.c_str(), params);
} }
struct llama_model * common_load_model_from_hf( struct llama_model * common_load_model_from_hf(
const char * repo, const std::string & repo,
const char * model, const std::string & remote_path,
const char * path_model, const std::string & local_path,
const char * hf_token, const std::string & hf_token,
const struct llama_model_params & params) { const struct llama_model_params & params) {
// construct hugging face model url: // construct hugging face model url:
// //
@ -1438,27 +1438,27 @@ struct llama_model * common_load_model_from_hf(
std::string model_url = "https://huggingface.co/"; std::string model_url = "https://huggingface.co/";
model_url += repo; model_url += repo;
model_url += "/resolve/main/"; model_url += "/resolve/main/";
model_url += model; model_url += remote_path;
return common_load_model_from_url(model_url.c_str(), path_model, hf_token, params); return common_load_model_from_url(model_url, local_path, hf_token, params);
} }
#else #else
struct llama_model * common_load_model_from_url( struct llama_model * common_load_model_from_url(
const char * /*model_url*/, const std::string & /*model_url*/,
const char * /*path_model*/, const std::string & /*local_path*/,
const char * /*hf_token*/, const std::string & /*hf_token*/,
const struct llama_model_params & /*params*/) { const struct llama_model_params & /*params*/) {
LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__); LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
return nullptr; return nullptr;
} }
struct llama_model * common_load_model_from_hf( struct llama_model * common_load_model_from_hf(
const char * /*repo*/, const std::string & /*repo*/,
const char * /*model*/, const std::string & /*remote_path*/,
const char * /*path_model*/, const std::string & /*local_path*/,
const char * /*hf_token*/, const std::string & /*hf_token*/,
const struct llama_model_params & /*params*/) { const struct llama_model_params & /*params*/) {
LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
return nullptr; return nullptr;

View File

@ -470,8 +470,17 @@ struct llama_model_params common_model_params_to_llama ( common_params
struct llama_context_params common_context_params_to_llama(const common_params & params); struct llama_context_params common_context_params_to_llama(const common_params & params);
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params); struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
struct llama_model * common_load_model_from_url(const char * model_url, const char * path_model, const char * hf_token, const struct llama_model_params & params); struct llama_model * common_load_model_from_url(
struct llama_model * common_load_model_from_hf(const char * repo, const char * file, const char * path_model, const char * hf_token, const struct llama_model_params & params); const std::string & model_url,
const std::string & local_path,
const std::string & hf_token,
const struct llama_model_params & params);
struct llama_model * common_load_model_from_hf(
const std::string & repo,
const std::string & remote_path,
const std::string & local_path,
const std::string & hf_token,
const struct llama_model_params & params);
// clear LoRA adapters from context, then apply new list of adapters // clear LoRA adapters from context, then apply new list of adapters
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_container> & lora_adapters); void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_container> & lora_adapters);

View File

@ -319,7 +319,6 @@ class ServerPreset:
server.model_hf_repo = "ggml-org/models" server.model_hf_repo = "ggml-org/models"
server.model_hf_file = "jina-reranker-v1-tiny-en/ggml-model-f16.gguf" server.model_hf_file = "jina-reranker-v1-tiny-en/ggml-model-f16.gguf"
server.model_alias = "jina-reranker" server.model_alias = "jina-reranker"
server.model_file = "./tmp/jina-reranker-v1-tiny-en.gguf"
server.n_ctx = 512 server.n_ctx = 512
server.n_batch = 512 server.n_batch = 512
server.n_slots = 1 server.n_slots = 1