diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 754611bf3..b82167cbf 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -539,6 +539,8 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffe void * dev_ptr; cudaError_t err = cudaMalloc(&dev_ptr, size); if (err != cudaSuccess) { + // clear the error + cudaGetLastError(); GGML_CUDA_LOG_ERROR("%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size / 1024.0 / 1024.0, buft_ctx->device, cudaGetErrorString(err)); return nullptr; }