mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 11:24:35 +00:00
fixup! Store layers in VRAM
This commit is contained in:
parent
3ed4588e22
commit
8a9d7ce624
@ -412,7 +412,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
if (llama_mmap_supported()) {
|
||||
fprintf(stderr, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
|
||||
}
|
||||
fprintf(stderr, " --gpu_layers number of layers to store in VRAM");
|
||||
fprintf(stderr, " --gpu_layers number of layers to store in VRAM\n");
|
||||
fprintf(stderr, " --mtest compute maximum memory usage\n");
|
||||
fprintf(stderr, " --verbose-prompt print prompt before generation\n");
|
||||
fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
|
||||
|
Loading…
Reference in New Issue
Block a user