diff --git a/ggml/src/ggml-amx/ggml-amx.cpp b/ggml/src/ggml-amx/ggml-amx.cpp index 144dc9d8a..2db0b720a 100644 --- a/ggml/src/ggml-amx/ggml-amx.cpp +++ b/ggml/src/ggml-amx/ggml-amx.cpp @@ -433,4 +433,8 @@ void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) { GGML_UNUSED(n_threads); } +ggml_backend_reg_t ggml_backend_amx_reg(void) { + return nullptr; +} + #endif diff --git a/ggml/src/ggml-amx/mmq.cpp b/ggml/src/ggml-amx/mmq.cpp index 239d15121..529bee25b 100644 --- a/ggml/src/ggml-amx/mmq.cpp +++ b/ggml/src/ggml-amx/mmq.cpp @@ -496,19 +496,20 @@ inline void from_float(const float * x, char * vy, int64_t k); template <> inline void from_float(const float * x, char * vy, int64_t k) { - quantize_row_q8_0(x, vy, k); + // FIXME: using unoptimized reference impl until moved to CPU backend + quantize_row_q8_0_ref(x, (block_q8_0 *)vy, k); } template <> inline void from_float(const float * x, char * vy, int64_t k) { - quantize_row_q8_1(x, vy, k); + quantize_row_q8_1_ref(x, (block_q8_1 *)vy, k); } template <> inline void from_float(const float * x, char * vy, int64_t k) { #if 1 // TODO: this is reference impl! - quantize_row_q8_K(x, vy, k); + quantize_row_q8_K_ref(x, (block_q8_K *)vy, k); #else quantize_row_q8_K_vnni(x, vy, k); #endif diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 6bd537d96..63e9d8201 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -31,10 +31,6 @@ #include "ggml-rpc.h" #endif -#ifndef __AMX_INT8__ -#undef GGML_USE_AMX -#endif - #ifdef GGML_USE_AMX # include "ggml-amx.h" #endif @@ -84,6 +80,10 @@ struct ggml_backend_registry { } void register_backend(ggml_backend_reg_t reg) { + if (!reg) { + return; + } + #ifndef NDEBUG GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n", __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));