cuda : allocate all temporary ggml_tensor_extra_gpu from a fixed-size buffer (#2220)

This commit is contained in:
Bach Le 2023-07-15 03:00:58 +08:00 committed by GitHub
parent e8035f141e
commit 7cdd30bf1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -3646,6 +3646,22 @@ void ggml_cuda_free_data(struct ggml_tensor * tensor) {
delete extra; delete extra;
} }
static struct ggml_tensor_extra_gpu * g_temp_tensor_extras = nullptr;
static size_t g_temp_tensor_extra_index = 0;
static struct ggml_tensor_extra_gpu * ggml_cuda_alloc_temp_tensor_extra() {
if (g_temp_tensor_extras == nullptr) {
g_temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_MAX_NODES];
}
size_t alloc_index = g_temp_tensor_extra_index;
g_temp_tensor_extra_index = (g_temp_tensor_extra_index + 1) % GGML_MAX_NODES;
struct ggml_tensor_extra_gpu * extra = &g_temp_tensor_extras[alloc_index];
memset(extra, 0, sizeof(*extra));
return extra;
}
void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bool force_inplace) { void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bool force_inplace) {
if (scratch && g_scratch_size == 0) { if (scratch && g_scratch_size == 0) {
return; return;
@ -3663,8 +3679,7 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
} }
tensor->backend = GGML_BACKEND_GPU; tensor->backend = GGML_BACKEND_GPU;
struct ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu; struct ggml_tensor_extra_gpu * extra;
memset(extra, 0, sizeof(*extra));
const bool inplace = (tensor->src[0] != nullptr && tensor->src[0]->data == tensor->data) || const bool inplace = (tensor->src[0] != nullptr && tensor->src[0]->data == tensor->data) ||
tensor->op == GGML_OP_VIEW || tensor->op == GGML_OP_VIEW ||
@ -3679,10 +3694,12 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
if (tensor->op == GGML_OP_VIEW) { if (tensor->op == GGML_OP_VIEW) {
memcpy(&offset, tensor->src[2]->data, sizeof(size_t)); memcpy(&offset, tensor->src[2]->data, sizeof(size_t));
} }
extra = ggml_cuda_alloc_temp_tensor_extra();
extra->data_device[g_main_device] = src0_ddc + offset; extra->data_device[g_main_device] = src0_ddc + offset;
} else if (tensor->op == GGML_OP_CPY) { } else if (tensor->op == GGML_OP_CPY) {
struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src[1]->extra; struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src[1]->extra;
void * src1_ddv = src1_extra->data_device[g_main_device]; void * src1_ddv = src1_extra->data_device[g_main_device];
extra = ggml_cuda_alloc_temp_tensor_extra();
extra->data_device[g_main_device] = src1_ddv; extra->data_device[g_main_device] = src1_ddv;
} else if (scratch) { } else if (scratch) {
GGML_ASSERT(size <= g_scratch_size); GGML_ASSERT(size <= g_scratch_size);
@ -3695,6 +3712,7 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
CUDA_CHECK(cudaMalloc(&data, g_scratch_size)); CUDA_CHECK(cudaMalloc(&data, g_scratch_size));
g_scratch_buffer = data; g_scratch_buffer = data;
} }
extra = ggml_cuda_alloc_temp_tensor_extra();
extra->data_device[g_main_device] = data + g_scratch_offset; extra->data_device[g_main_device] = data + g_scratch_offset;
g_scratch_offset += size; g_scratch_offset += size;
@ -3704,6 +3722,8 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
void * data; void * data;
CUDA_CHECK(cudaMalloc(&data, size)); CUDA_CHECK(cudaMalloc(&data, size));
CUDA_CHECK(cudaMemset(data, 0, size)); CUDA_CHECK(cudaMemset(data, 0, size));
extra = new ggml_tensor_extra_gpu;
memset(extra, 0, sizeof(*extra));
extra->data_device[g_main_device] = data; extra->data_device[g_main_device] = data;
} }