mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-03 23:34:35 +00:00
metal : disable concurrency optimization
This commit is contained in:
parent
f015b26689
commit
86c90e34f5
@ -6605,8 +6605,8 @@ struct llama_context * llama_new_context_with_model(
|
|||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false);
|
//ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false);
|
||||||
ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
|
//ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// measure memory requirements for the graph
|
// measure memory requirements for the graph
|
||||||
@ -6621,7 +6621,7 @@ struct llama_context * llama_new_context_with_model(
|
|||||||
ctx->alloc = ggml_allocr_new(ctx->buf_alloc.data, ctx->buf_alloc.size, tensor_alignment);
|
ctx->alloc = ggml_allocr_new(ctx->buf_alloc.data, ctx->buf_alloc.size, tensor_alignment);
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
if (ctx->ctx_metal) {
|
if (ctx->ctx_metal) {
|
||||||
ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
|
//ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
|
Loading…
Reference in New Issue
Block a user