mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 10:54:36 +00:00
arg : bring back missing ifdef (#9411)
* arg : bring back missing ifdef * replace with llama_supports_gpu_offload
This commit is contained in:
parent
8d300bd35f
commit
6cd4e03444
@ -1417,20 +1417,18 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||||||
params.split_mode = LLAMA_SPLIT_MODE_NONE;
|
params.split_mode = LLAMA_SPLIT_MODE_NONE;
|
||||||
} else if (arg_next == "layer") {
|
} else if (arg_next == "layer") {
|
||||||
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
|
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
|
||||||
}
|
} else if (arg_next == "row") {
|
||||||
else if (arg_next == "row") {
|
|
||||||
#ifdef GGML_USE_SYCL
|
#ifdef GGML_USE_SYCL
|
||||||
fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n");
|
fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
#endif // GGML_USE_SYCL
|
#endif // GGML_USE_SYCL
|
||||||
params.split_mode = LLAMA_SPLIT_MODE_ROW;
|
params.split_mode = LLAMA_SPLIT_MODE_ROW;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
throw std::invalid_argument("invalid value");
|
throw std::invalid_argument("invalid value");
|
||||||
}
|
}
|
||||||
#ifndef GGML_USE_CUDA_SYCL_VULKAN
|
if (!llama_supports_gpu_offload()) {
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the split mode has no effect.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the split mode has no effect.\n");
|
||||||
#endif // GGML_USE_CUDA_SYCL_VULKAN
|
}
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
@ -1450,14 +1448,14 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||||||
}
|
}
|
||||||
for (size_t i = 0; i < llama_max_devices(); ++i) {
|
for (size_t i = 0; i < llama_max_devices(); ++i) {
|
||||||
if (i < split_arg.size()) {
|
if (i < split_arg.size()) {
|
||||||
params.tensor_split[i] = std::stof(split_arg[i]);
|
params.tensor_split[i] = std::stof(split_arg[i]);
|
||||||
} else {
|
} else {
|
||||||
params.tensor_split[i] = 0.0f;
|
params.tensor_split[i] = 0.0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifndef GGML_USE_CUDA_SYCL_VULKAN
|
if (!llama_supports_gpu_offload()) {
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting a tensor split has no effect.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting a tensor split has no effect.\n");
|
||||||
#endif // GGML_USE_CUDA_SYCL_VULKAN
|
}
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
@ -1465,9 +1463,9 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||||||
format("the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: %d)", params.main_gpu),
|
format("the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: %d)", params.main_gpu),
|
||||||
[](gpt_params & params, int value) {
|
[](gpt_params & params, int value) {
|
||||||
params.main_gpu = value;
|
params.main_gpu = value;
|
||||||
#ifndef GGML_USE_CUDA_SYCL_VULKAN
|
if (!llama_supports_gpu_offload()) {
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the main GPU has no effect.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the main GPU has no effect.\n");
|
||||||
#endif // GGML_USE_CUDA_SYCL_VULKAN
|
}
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
|
@ -56,14 +56,6 @@
|
|||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL))
|
|
||||||
#define GGML_USE_CUDA_SYCL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)) || defined(GGML_USE_VULKAN)
|
|
||||||
#define GGML_USE_CUDA_SYCL_VULKAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(LLAMA_USE_CURL)
|
#if defined(LLAMA_USE_CURL)
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <linux/limits.h>
|
#include <linux/limits.h>
|
||||||
|
Loading…
Reference in New Issue
Block a user