metal : free metal objects (#5161)

* Releasing MTLFunction references after Metal pipeline construction

* Keeping the `ggml_metal_kernel` structure

* Spacing fix

* Whitespace fix
This commit is contained in:
Paul Tsochantaris 2024-01-28 19:50:16 +00:00 committed by GitHub
parent 35dec26cc2
commit d2f650cb5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -24,10 +24,7 @@
#define UNUSED(x) (void)(x) #define UNUSED(x) (void)(x)
#define GGML_METAL_MAX_KERNELS 256
struct ggml_metal_kernel { struct ggml_metal_kernel {
id<MTLFunction> function;
id<MTLComputePipelineState> pipeline; id<MTLComputePipelineState> pipeline;
}; };
@ -159,11 +156,10 @@ struct ggml_metal_context {
id<MTLDevice> device; id<MTLDevice> device;
id<MTLCommandQueue> queue; id<MTLCommandQueue> queue;
id<MTLLibrary> library;
dispatch_queue_t d_queue; dispatch_queue_t d_queue;
struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS]; struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
bool support_simdgroup_reduction; bool support_simdgroup_reduction;
bool support_simdgroup_mm; bool support_simdgroup_mm;
@ -246,6 +242,8 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
ctx->queue = [ctx->device newCommandQueue]; ctx->queue = [ctx->device newCommandQueue];
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
id<MTLLibrary> metal_library;
// load library // load library
{ {
NSBundle * bundle = nil; NSBundle * bundle = nil;
@ -260,7 +258,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
// pre-compiled library found // pre-compiled library found
NSURL * libURL = [NSURL fileURLWithPath:libPath]; NSURL * libURL = [NSURL fileURLWithPath:libPath];
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]); GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
ctx->library = [ctx->device newLibraryWithURL:libURL error:&error]; metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
if (error) { if (error) {
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL; return NULL;
@ -302,7 +300,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
//[options setFastMathEnabled:false]; //[options setFastMathEnabled:false];
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error]; metal_library = [ctx->device newLibraryWithSource:src options:options error:&error];
if (error) { if (error) {
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL; return NULL;
@ -367,8 +365,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
{ {
NSError * error = nil; NSError * error = nil;
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) { for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
ctx->kernels[i].function = nil;
ctx->kernels[i].pipeline = nil; ctx->kernels[i].pipeline = nil;
} }
@ -380,10 +377,12 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
#define GGML_METAL_ADD_KERNEL(e, name, supported) \ #define GGML_METAL_ADD_KERNEL(e, name, supported) \
if (supported) { \ if (supported) { \
struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \ struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
kernel->function = [ctx->library newFunctionWithName:@"kernel_"#name]; \ id<MTLFunction> metal_function = [metal_library newFunctionWithName:@"kernel_"#name]; \
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:kernel->function error:&error]; \ kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \
[metal_function release]; \
if (error) { \ if (error) { \
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \ GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
[metal_library release]; \
return NULL; \ return NULL; \
} \ } \
} else { \ } else { \
@ -512,23 +511,17 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
} }
[metal_library release];
return ctx; return ctx;
} }
static void ggml_metal_free(struct ggml_metal_context * ctx) { static void ggml_metal_free(struct ggml_metal_context * ctx) {
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__); GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) { for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
if (ctx->kernels[i].pipeline) {
[ctx->kernels[i].pipeline release]; [ctx->kernels[i].pipeline release];
} }
if (ctx->kernels[i].function) {
[ctx->kernels[i].function release];
}
}
[ctx->library release];
[ctx->queue release]; [ctx->queue release];
[ctx->device release]; [ctx->device release];