mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
metal : add debug capture backend function (ggml/694)
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
8e14e3ddb3
commit
5f14ee0b0c
@ -57,6 +57,9 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(voi
|
|||||||
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||||
GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
||||||
|
|
||||||
|
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
|
||||||
|
GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
40
ggml-metal.m
40
ggml-metal.m
@ -168,6 +168,8 @@ struct ggml_metal_context {
|
|||||||
|
|
||||||
bool support_simdgroup_reduction;
|
bool support_simdgroup_reduction;
|
||||||
bool support_simdgroup_mm;
|
bool support_simdgroup_mm;
|
||||||
|
|
||||||
|
bool should_capture_next_compute;
|
||||||
};
|
};
|
||||||
|
|
||||||
// MSL code
|
// MSL code
|
||||||
@ -687,6 +689,20 @@ static bool ggml_metal_graph_compute(
|
|||||||
const int n_cb = ctx->n_cb;
|
const int n_cb = ctx->n_cb;
|
||||||
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
|
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
|
||||||
|
|
||||||
|
const bool should_capture = ctx->should_capture_next_compute;
|
||||||
|
if (should_capture) {
|
||||||
|
ctx->should_capture_next_compute = false;
|
||||||
|
|
||||||
|
MTLCaptureDescriptor * descriptor = [MTLCaptureDescriptor new];
|
||||||
|
descriptor.captureObject = ctx->queue;
|
||||||
|
|
||||||
|
NSError * error = nil;
|
||||||
|
if (![[MTLCaptureManager sharedCaptureManager] startCaptureWithDescriptor:descriptor error:&error]) {
|
||||||
|
GGML_METAL_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]);
|
||||||
|
GGML_ASSERT(!"capture failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
id<MTLCommandBuffer> command_buffer_builder[n_cb];
|
id<MTLCommandBuffer> command_buffer_builder[n_cb];
|
||||||
for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) {
|
for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) {
|
||||||
id<MTLCommandBuffer> command_buffer = [ctx->queue commandBufferWithUnretainedReferences];
|
id<MTLCommandBuffer> command_buffer = [ctx->queue commandBufferWithUnretainedReferences];
|
||||||
@ -695,6 +711,7 @@ static bool ggml_metal_graph_compute(
|
|||||||
// enqueue the command buffers in order to specify their execution order
|
// enqueue the command buffers in order to specify their execution order
|
||||||
[command_buffer enqueue];
|
[command_buffer enqueue];
|
||||||
}
|
}
|
||||||
|
|
||||||
const id<MTLCommandBuffer> *command_buffers = command_buffer_builder;
|
const id<MTLCommandBuffer> *command_buffers = command_buffer_builder;
|
||||||
|
|
||||||
dispatch_apply(n_cb, ctx->d_queue, ^(size_t iter) {
|
dispatch_apply(n_cb, ctx->d_queue, ^(size_t iter) {
|
||||||
@ -741,9 +758,9 @@ static bool ggml_metal_graph_compute(
|
|||||||
GGML_ASSERT(!"unsupported op");
|
GGML_ASSERT(!"unsupported op");
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef GGML_METAL_NDEBUG
|
if (should_capture) {
|
||||||
[encoder pushDebugGroup:[NSString stringWithCString:ggml_op_desc(dst) encoding:NSUTF8StringEncoding]];
|
[encoder pushDebugGroup:[NSString stringWithCString:ggml_op_desc(dst) encoding:NSUTF8StringEncoding]];
|
||||||
#endif
|
}
|
||||||
|
|
||||||
const int64_t ne00 = src0 ? src0->ne[0] : 0;
|
const int64_t ne00 = src0 ? src0->ne[0] : 0;
|
||||||
const int64_t ne01 = src0 ? src0->ne[1] : 0;
|
const int64_t ne01 = src0 ? src0->ne[1] : 0;
|
||||||
@ -2218,9 +2235,9 @@ static bool ggml_metal_graph_compute(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef GGML_METAL_NDEBUG
|
if (should_capture) {
|
||||||
[encoder popDebugGroup];
|
[encoder popDebugGroup];
|
||||||
#endif
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[encoder endEncoding];
|
[encoder endEncoding];
|
||||||
@ -2242,6 +2259,10 @@ static bool ggml_metal_graph_compute(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (should_capture) {
|
||||||
|
[[MTLCaptureManager sharedCaptureManager] stopCapture];
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2613,6 +2634,13 @@ bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family) {
|
|||||||
return [ctx->device supportsFamily:(MTLGPUFamilyApple1 + family - 1)];
|
return [ctx->device supportsFamily:(MTLGPUFamilyApple1 + family - 1)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) {
|
||||||
|
GGML_ASSERT(ggml_backend_is_metal(backend));
|
||||||
|
|
||||||
|
struct ggml_metal_context * ctx = (struct ggml_metal_context *)backend->context;
|
||||||
|
ctx->should_capture_next_compute = true;
|
||||||
|
}
|
||||||
|
|
||||||
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data); // silence warning
|
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data); // silence warning
|
||||||
|
|
||||||
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data) {
|
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data) {
|
||||||
|
Loading…
Reference in New Issue
Block a user