diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index f2a4ed84f..684578f98 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -2064,7 +2064,7 @@ extern "C" { // ================================================================================================= // CPU-only API for ggml_cgraph // - // TODO: move as a separate backend + // TODO: move to the CPU backend // NOTE: avoid using, will be removed // diff --git a/ggml/src/ggml-backend.c b/ggml/src/ggml-backend.c index 10739e0c7..4787a0bd1 100644 --- a/ggml/src/ggml-backend.c +++ b/ggml/src/ggml-backend.c @@ -751,10 +751,9 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cpu_get_default_buffer_ GGML_UNUSED(backend); } -// TODO: this struct should no longer be needed -// instead, the new ggml_graph_work_init() + ggml_graph_work_free() API should be enough to replace this -// for now, keeping the implementation as it is, to avoid making a mistake struct ggml_backend_plan_cpu { + // TODO: move member from ggml_cgraph here when the public CPU-only API is removed + struct ggml_cgraph cgraph; }; diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 6539e8cb4..31a46ea3e 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -774,6 +774,8 @@ struct ggml_cgraph { enum ggml_cgraph_eval_order order; + // TODO: after the CPU-only API is removed, we can move the members below to ggml_backend_plan_cpu + bool work_own; size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()` uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`