mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-03 15:24:35 +00:00
This commit is contained in:
parent
bc40adb1fa
commit
740e7cb6e5
@ -1353,15 +1353,15 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||
params.image.emplace_back(value);
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_LLAVA}));
|
||||
#ifdef GGML_USE_RPC
|
||||
add_opt(llama_arg(
|
||||
{"--rpc"}, "SERVERS",
|
||||
"comma separated list of RPC servers",
|
||||
[](gpt_params & params, const std::string & value) {
|
||||
params.rpc_servers = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_RPC"));
|
||||
#endif
|
||||
if (llama_supports_rpc()) {
|
||||
add_opt(llama_arg(
|
||||
{"--rpc"}, "SERVERS",
|
||||
"comma separated list of RPC servers",
|
||||
[](gpt_params & params, const std::string & value) {
|
||||
params.rpc_servers = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_RPC"));
|
||||
}
|
||||
add_opt(llama_arg(
|
||||
{"--mlock"},
|
||||
"force system to keep model in RAM rather than swapping or compressing",
|
||||
|
@ -304,9 +304,9 @@ static void print_usage(int /* argc */, char ** argv) {
|
||||
printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
|
||||
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
|
||||
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
||||
#ifdef GGML_USE_RPC
|
||||
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
|
||||
#endif
|
||||
if (llama_supports_rpc()) {
|
||||
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
|
||||
}
|
||||
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
|
||||
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
||||
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
||||
@ -497,14 +497,12 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
}
|
||||
auto p = string_split<int>(argv[i], split_delim);
|
||||
params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end());
|
||||
#ifdef GGML_USE_RPC
|
||||
} else if (arg == "-rpc" || arg == "--rpc") {
|
||||
} else if (llama_supports_rpc() && (arg == "-rpc" || arg == "--rpc")) {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.rpc_servers.push_back(argv[i]);
|
||||
#endif
|
||||
} else if (arg == "-sm" || arg == "--split-mode") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
|
@ -433,6 +433,7 @@ extern "C" {
|
||||
LLAMA_API bool llama_supports_mmap (void);
|
||||
LLAMA_API bool llama_supports_mlock (void);
|
||||
LLAMA_API bool llama_supports_gpu_offload(void);
|
||||
LLAMA_API bool llama_supports_rpc (void);
|
||||
|
||||
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
||||
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
||||
|
@ -18996,10 +18996,14 @@ bool llama_supports_gpu_offload(void) {
|
||||
#else
|
||||
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
|
||||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU_FULL) != nullptr ||
|
||||
ggml_backend_reg_by_name("RPC") != nullptr;
|
||||
llama_supports_rpc();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool llama_supports_rpc(void) {
|
||||
return ggml_backend_reg_by_name("RPC") != nullptr;
|
||||
}
|
||||
|
||||
void llama_backend_init(void) {
|
||||
ggml_time_init();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user