#ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif #ifdef GGML_USE_METAL #include "ggml-metal.h" #endif #include "ggml-rpc.h" #include #include static ggml_backend_t create_backend() { ggml_backend_t backend = NULL; #ifdef GGML_USE_CUDA fprintf(stderr, "%s: using CUDA backend\n", __func__); backend = ggml_backend_cuda_init(0); // init device 0 if (!backend) { fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__); } #elif GGML_USE_METAL fprintf(stderr, "%s: using Metal backend\n", __func__); backend = ggml_backend_metal_init(); if (!backend) { fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); } #endif // if there aren't GPU Backends fallback to CPU backend if (!backend) { fprintf(stderr, "%s: using CPU backend\n", __func__); backend = ggml_backend_cpu_init(); } return backend; } static void get_backend_memory(size_t * free_mem, size_t * total_mem) { #ifdef GGML_USE_CUDA ggml_backend_cuda_get_device_memory(0, free_mem, total_mem); #else // TODO: implement for other backends *free_mem = 1; *total_mem = 1; #endif } int main(int argc, char * argv[]) { if (argc < 3) { fprintf(stderr, "Usage: %s \n", argv[0]); return 1; } const char * host = argv[1]; int port = std::stoi(argv[2]); if (port <= 0 || port > 65535) { fprintf(stderr, "Invalid port number: %d\n", port); return 1; } ggml_backend_t backend = create_backend(); if (!backend) { fprintf(stderr, "Failed to create backend\n"); return 1; } printf("Starting RPC server on %s:%d\n", host, port); size_t free_mem, total_mem; get_backend_memory(&free_mem, &total_mem); std::string endpoint = std::string(host) + ":" + std::to_string(port); start_rpc_server(backend, endpoint.c_str(), free_mem, total_mem); ggml_backend_free(backend); return 0; }