mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-05 16:24:34 +00:00
This commit is contained in:
parent
bc40adb1fa
commit
740e7cb6e5
@ -1353,15 +1353,15 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||||||
params.image.emplace_back(value);
|
params.image.emplace_back(value);
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_LLAVA}));
|
).set_examples({LLAMA_EXAMPLE_LLAVA}));
|
||||||
#ifdef GGML_USE_RPC
|
if (llama_supports_rpc()) {
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--rpc"}, "SERVERS",
|
{"--rpc"}, "SERVERS",
|
||||||
"comma separated list of RPC servers",
|
"comma separated list of RPC servers",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.rpc_servers = value;
|
params.rpc_servers = value;
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_RPC"));
|
).set_env("LLAMA_ARG_RPC"));
|
||||||
#endif
|
}
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--mlock"},
|
{"--mlock"},
|
||||||
"force system to keep model in RAM rather than swapping or compressing",
|
"force system to keep model in RAM rather than swapping or compressing",
|
||||||
|
@ -304,9 +304,9 @@ static void print_usage(int /* argc */, char ** argv) {
|
|||||||
printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
|
printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
|
||||||
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
|
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
|
||||||
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
||||||
#ifdef GGML_USE_RPC
|
if (llama_supports_rpc()) {
|
||||||
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
|
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
|
||||||
#endif
|
}
|
||||||
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
|
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
|
||||||
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
||||||
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
||||||
@ -497,14 +497,12 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
auto p = string_split<int>(argv[i], split_delim);
|
auto p = string_split<int>(argv[i], split_delim);
|
||||||
params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end());
|
params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end());
|
||||||
#ifdef GGML_USE_RPC
|
} else if (llama_supports_rpc() && (arg == "-rpc" || arg == "--rpc")) {
|
||||||
} else if (arg == "-rpc" || arg == "--rpc") {
|
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.rpc_servers.push_back(argv[i]);
|
params.rpc_servers.push_back(argv[i]);
|
||||||
#endif
|
|
||||||
} else if (arg == "-sm" || arg == "--split-mode") {
|
} else if (arg == "-sm" || arg == "--split-mode") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
|
@ -433,6 +433,7 @@ extern "C" {
|
|||||||
LLAMA_API bool llama_supports_mmap (void);
|
LLAMA_API bool llama_supports_mmap (void);
|
||||||
LLAMA_API bool llama_supports_mlock (void);
|
LLAMA_API bool llama_supports_mlock (void);
|
||||||
LLAMA_API bool llama_supports_gpu_offload(void);
|
LLAMA_API bool llama_supports_gpu_offload(void);
|
||||||
|
LLAMA_API bool llama_supports_rpc (void);
|
||||||
|
|
||||||
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
||||||
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
||||||
|
@ -18996,10 +18996,14 @@ bool llama_supports_gpu_offload(void) {
|
|||||||
#else
|
#else
|
||||||
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
|
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
|
||||||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU_FULL) != nullptr ||
|
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU_FULL) != nullptr ||
|
||||||
ggml_backend_reg_by_name("RPC") != nullptr;
|
llama_supports_rpc();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool llama_supports_rpc(void) {
|
||||||
|
return ggml_backend_reg_by_name("RPC") != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
void llama_backend_init(void) {
|
void llama_backend_init(void) {
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user