llama : add llama_supports_rpc API
Some checks failed
flake8 Lint / Lint (push) Has been cancelled

This commit is contained in:
slaren 2024-10-10 02:26:35 +02:00
parent bc40adb1fa
commit 740e7cb6e5
4 changed files with 19 additions and 16 deletions

View File

@ -1353,15 +1353,15 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
params.image.emplace_back(value); params.image.emplace_back(value);
} }
).set_examples({LLAMA_EXAMPLE_LLAVA})); ).set_examples({LLAMA_EXAMPLE_LLAVA}));
#ifdef GGML_USE_RPC if (llama_supports_rpc()) {
add_opt(llama_arg( add_opt(llama_arg(
{"--rpc"}, "SERVERS", {"--rpc"}, "SERVERS",
"comma separated list of RPC servers", "comma separated list of RPC servers",
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & value) {
params.rpc_servers = value; params.rpc_servers = value;
} }
).set_env("LLAMA_ARG_RPC")); ).set_env("LLAMA_ARG_RPC"));
#endif }
add_opt(llama_arg( add_opt(llama_arg(
{"--mlock"}, {"--mlock"},
"force system to keep model in RAM rather than swapping or compressing", "force system to keep model in RAM rather than swapping or compressing",

View File

@ -304,9 +304,9 @@ static void print_usage(int /* argc */, char ** argv) {
printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str()); printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str()); printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str()); printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
#ifdef GGML_USE_RPC if (llama_supports_rpc()) {
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str()); printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
#endif }
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str()); printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str()); printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str()); printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
@ -497,14 +497,12 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
} }
auto p = string_split<int>(argv[i], split_delim); auto p = string_split<int>(argv[i], split_delim);
params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end()); params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end());
#ifdef GGML_USE_RPC } else if (llama_supports_rpc() && (arg == "-rpc" || arg == "--rpc")) {
} else if (arg == "-rpc" || arg == "--rpc") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
break; break;
} }
params.rpc_servers.push_back(argv[i]); params.rpc_servers.push_back(argv[i]);
#endif
} else if (arg == "-sm" || arg == "--split-mode") { } else if (arg == "-sm" || arg == "--split-mode") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;

View File

@ -433,6 +433,7 @@ extern "C" {
LLAMA_API bool llama_supports_mmap (void); LLAMA_API bool llama_supports_mmap (void);
LLAMA_API bool llama_supports_mlock (void); LLAMA_API bool llama_supports_mlock (void);
LLAMA_API bool llama_supports_gpu_offload(void); LLAMA_API bool llama_supports_gpu_offload(void);
LLAMA_API bool llama_supports_rpc (void);
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);

View File

@ -18996,10 +18996,14 @@ bool llama_supports_gpu_offload(void) {
#else #else
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr || return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU_FULL) != nullptr || ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU_FULL) != nullptr ||
ggml_backend_reg_by_name("RPC") != nullptr; llama_supports_rpc();
#endif #endif
} }
bool llama_supports_rpc(void) {
return ggml_backend_reg_by_name("RPC") != nullptr;
}
void llama_backend_init(void) { void llama_backend_init(void) {
ggml_time_init(); ggml_time_init();