mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
ggml : prefix lookup tables with ggml_
ggml-ci
This commit is contained in:
parent
1039a16ce2
commit
d70917f4b2
@ -207,7 +207,8 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
|||||||
#endif // __ARM_NEON
|
#endif // __ARM_NEON
|
||||||
|
|
||||||
// precomputed f32 table for f16 (256 KB)
|
// precomputed f32 table for f16 (256 KB)
|
||||||
extern float table_f32_f16[1 << 16];
|
// defined in ggml.c, initialized in ggml_init()
|
||||||
|
extern float ggml_table_f32_f16[1 << 16];
|
||||||
|
|
||||||
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
||||||
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
||||||
@ -217,7 +218,7 @@ extern float table_f32_f16[1 << 16];
|
|||||||
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
||||||
uint16_t s;
|
uint16_t s;
|
||||||
memcpy(&s, &f, sizeof(uint16_t));
|
memcpy(&s, &f, sizeof(uint16_t));
|
||||||
return table_f32_f16[s];
|
return ggml_table_f32_f16[s];
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
|
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
|
||||||
|
44
ggml.c
44
ggml.c
@ -232,19 +232,19 @@ typedef double ggml_float;
|
|||||||
//
|
//
|
||||||
|
|
||||||
// precomputed gelu table for f16 (128 KB)
|
// precomputed gelu table for f16 (128 KB)
|
||||||
static ggml_fp16_t table_gelu_f16[1 << 16];
|
static ggml_fp16_t ggml_table_gelu_f16[1 << 16];
|
||||||
|
|
||||||
// precomputed quick gelu table for f16 (128 KB)
|
// precomputed quick gelu table for f16 (128 KB)
|
||||||
static ggml_fp16_t table_gelu_quick_f16[1 << 16];
|
static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
|
||||||
|
|
||||||
// precomputed silu table for f16 (128 KB)
|
// precomputed silu table for f16 (128 KB)
|
||||||
static ggml_fp16_t table_silu_f16[1 << 16];
|
static ggml_fp16_t ggml_table_silu_f16[1 << 16];
|
||||||
|
|
||||||
// precomputed exp table for f16 (128 KB)
|
// precomputed exp table for f16 (128 KB)
|
||||||
static ggml_fp16_t table_exp_f16[1 << 16];
|
static ggml_fp16_t ggml_table_exp_f16[1 << 16];
|
||||||
|
|
||||||
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
||||||
float table_f32_f16[1 << 16];
|
float ggml_table_f32_f16[1 << 16];
|
||||||
|
|
||||||
// note: do not use these inside ggml.c
|
// note: do not use these inside ggml.c
|
||||||
// these are meant to be used via the ggml.h API
|
// these are meant to be used via the ggml.h API
|
||||||
@ -1363,7 +1363,7 @@ inline static float ggml_gelu_f32(float x) {
|
|||||||
inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
||||||
const uint16_t * i16 = (const uint16_t *) x;
|
const uint16_t * i16 = (const uint16_t *) x;
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
y[i] = table_gelu_f16[i16[i]];
|
y[i] = ggml_table_gelu_f16[i16[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1373,7 +1373,7 @@ inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
|||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
||||||
memcpy(&t, &fp16, sizeof(uint16_t));
|
memcpy(&t, &fp16, sizeof(uint16_t));
|
||||||
y[i] = GGML_FP16_TO_FP32(table_gelu_f16[t]);
|
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -1391,7 +1391,7 @@ inline static float ggml_gelu_quick_f32(float x) {
|
|||||||
//inline static void ggml_vec_gelu_quick_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
//inline static void ggml_vec_gelu_quick_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
||||||
// const uint16_t * i16 = (const uint16_t *) x;
|
// const uint16_t * i16 = (const uint16_t *) x;
|
||||||
// for (int i = 0; i < n; ++i) {
|
// for (int i = 0; i < n; ++i) {
|
||||||
// y[i] = table_gelu_quick_f16[i16[i]];
|
// y[i] = ggml_table_gelu_quick_f16[i16[i]];
|
||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
|
|
||||||
@ -1401,7 +1401,7 @@ inline static void ggml_vec_gelu_quick_f32(const int n, float * y, const float *
|
|||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
||||||
memcpy(&t, &fp16, sizeof(uint16_t));
|
memcpy(&t, &fp16, sizeof(uint16_t));
|
||||||
y[i] = GGML_FP16_TO_FP32(table_gelu_quick_f16[t]);
|
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_quick_f16[t]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -1420,7 +1420,7 @@ inline static float ggml_silu_f32(float x) {
|
|||||||
//inline static void ggml_vec_silu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
//inline static void ggml_vec_silu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
||||||
// const uint16_t * i16 = (const uint16_t *) x;
|
// const uint16_t * i16 = (const uint16_t *) x;
|
||||||
// for (int i = 0; i < n; ++i) {
|
// for (int i = 0; i < n; ++i) {
|
||||||
// y[i] = table_silu_f16[i16[i]];
|
// y[i] = ggml_table_silu_f16[i16[i]];
|
||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
|
|
||||||
@ -1430,7 +1430,7 @@ inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) {
|
|||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
||||||
memcpy(&t, &fp16, sizeof(uint16_t));
|
memcpy(&t, &fp16, sizeof(uint16_t));
|
||||||
y[i] = GGML_FP16_TO_FP32(table_silu_f16[t]);
|
y[i] = GGML_FP16_TO_FP32(ggml_table_silu_f16[t]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -2146,11 +2146,11 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||||||
for (int i = 0; i < (1 << 16); ++i) {
|
for (int i = 0; i < (1 << 16); ++i) {
|
||||||
uint16_t ui = i;
|
uint16_t ui = i;
|
||||||
memcpy(&ii, &ui, sizeof(ii));
|
memcpy(&ii, &ui, sizeof(ii));
|
||||||
const float f = table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii);
|
const float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii);
|
||||||
table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
|
ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
|
||||||
table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
||||||
table_silu_f16[i] = GGML_FP32_TO_FP16(ggml_silu_f32(f));
|
ggml_table_silu_f16[i] = GGML_FP32_TO_FP16(ggml_silu_f32(f));
|
||||||
table_exp_f16[i] = GGML_FP32_TO_FP16(expf(f));
|
ggml_table_exp_f16[i] = GGML_FP32_TO_FP16(expf(f));
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|
||||||
@ -10513,7 +10513,7 @@ static void ggml_compute_forward_soft_max_f32(
|
|||||||
// const float val = (sp[i] == -INFINITY) ? 0.0 : exp(sp[i] - max);
|
// const float val = (sp[i] == -INFINITY) ? 0.0 : exp(sp[i] - max);
|
||||||
ggml_fp16_t s = GGML_FP32_TO_FP16(sp[i] - max);
|
ggml_fp16_t s = GGML_FP32_TO_FP16(sp[i] - max);
|
||||||
memcpy(&scvt, &s, sizeof(scvt));
|
memcpy(&scvt, &s, sizeof(scvt));
|
||||||
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
|
const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt]);
|
||||||
sum += (ggml_float)val;
|
sum += (ggml_float)val;
|
||||||
dp[i] = val;
|
dp[i] = val;
|
||||||
}
|
}
|
||||||
@ -12802,7 +12802,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|||||||
#else
|
#else
|
||||||
ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max);
|
ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max);
|
||||||
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
||||||
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
|
const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt[j]]);
|
||||||
#endif
|
#endif
|
||||||
sump[j] += (ggml_float)val;
|
sump[j] += (ggml_float)val;
|
||||||
SS[j] = val;
|
SS[j] = val;
|
||||||
@ -13004,7 +13004,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|||||||
} else {
|
} else {
|
||||||
ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max);
|
ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max);
|
||||||
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
||||||
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
|
const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt[j]]);
|
||||||
sump[j] += (ggml_float)val;
|
sump[j] += (ggml_float)val;
|
||||||
SS[j] = val;
|
SS[j] = val;
|
||||||
}
|
}
|
||||||
@ -13455,7 +13455,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|||||||
#else
|
#else
|
||||||
ggml_fp16_t s = GGML_FP32_TO_FP16(SR[j] - max);
|
ggml_fp16_t s = GGML_FP32_TO_FP16(SR[j] - max);
|
||||||
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
||||||
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
|
const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt[j]]);
|
||||||
#endif
|
#endif
|
||||||
sump[j] += (ggml_float)val;
|
sump[j] += (ggml_float)val;
|
||||||
SW[j] = val;
|
SW[j] = val;
|
||||||
@ -14205,7 +14205,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|||||||
#else
|
#else
|
||||||
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
|
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
|
||||||
memcpy(&scvt, &s, sizeof(scvt));
|
memcpy(&scvt, &s, sizeof(scvt));
|
||||||
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
|
const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt]);
|
||||||
#endif
|
#endif
|
||||||
sum += (ggml_float)val;
|
sum += (ggml_float)val;
|
||||||
st[i] = val;
|
st[i] = val;
|
||||||
@ -14319,7 +14319,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|||||||
#else
|
#else
|
||||||
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
|
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
|
||||||
memcpy(&scvt, &s, sizeof(scvt));
|
memcpy(&scvt, &s, sizeof(scvt));
|
||||||
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
|
const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt]);
|
||||||
#endif
|
#endif
|
||||||
sum += (ggml_float)val;
|
sum += (ggml_float)val;
|
||||||
ds0[i] = val;
|
ds0[i] = val;
|
||||||
|
Loading…
Reference in New Issue
Block a user