mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-15 07:19:53 +00:00
use reference quantization fns in AMX until moved to CPU backend
ggml-ci
This commit is contained in:
parent
5cfaecd34c
commit
dddf3771c2
@ -433,4 +433,8 @@ void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {
|
|||||||
GGML_UNUSED(n_threads);
|
GGML_UNUSED(n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ggml_backend_reg_t ggml_backend_amx_reg(void) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -496,19 +496,20 @@ inline void from_float(const float * x, char * vy, int64_t k);
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
inline void from_float<block_q8_0>(const float * x, char * vy, int64_t k) {
|
inline void from_float<block_q8_0>(const float * x, char * vy, int64_t k) {
|
||||||
quantize_row_q8_0(x, vy, k);
|
// FIXME: using unoptimized reference impl until moved to CPU backend
|
||||||
|
quantize_row_q8_0_ref(x, (block_q8_0 *)vy, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
inline void from_float<block_q8_1>(const float * x, char * vy, int64_t k) {
|
inline void from_float<block_q8_1>(const float * x, char * vy, int64_t k) {
|
||||||
quantize_row_q8_1(x, vy, k);
|
quantize_row_q8_1_ref(x, (block_q8_1 *)vy, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
inline void from_float<block_q8_K>(const float * x, char * vy, int64_t k) {
|
inline void from_float<block_q8_K>(const float * x, char * vy, int64_t k) {
|
||||||
#if 1
|
#if 1
|
||||||
// TODO: this is reference impl!
|
// TODO: this is reference impl!
|
||||||
quantize_row_q8_K(x, vy, k);
|
quantize_row_q8_K_ref(x, (block_q8_K *)vy, k);
|
||||||
#else
|
#else
|
||||||
quantize_row_q8_K_vnni(x, vy, k);
|
quantize_row_q8_K_vnni(x, vy, k);
|
||||||
#endif
|
#endif
|
||||||
|
@ -31,10 +31,6 @@
|
|||||||
#include "ggml-rpc.h"
|
#include "ggml-rpc.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __AMX_INT8__
|
|
||||||
#undef GGML_USE_AMX
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_AMX
|
#ifdef GGML_USE_AMX
|
||||||
# include "ggml-amx.h"
|
# include "ggml-amx.h"
|
||||||
#endif
|
#endif
|
||||||
@ -84,6 +80,10 @@ struct ggml_backend_registry {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void register_backend(ggml_backend_reg_t reg) {
|
void register_backend(ggml_backend_reg_t reg) {
|
||||||
|
if (!reg) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
||||||
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
||||||
|
Loading…
Reference in New Issue
Block a user