mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
ggml ; remove unused ggml_mul special case
It would otherwise conflict with the more general optimization coming with Mamba-2. * ggml : handle TQ1_0 and TQ2_0 in dequantization-based operators
This commit is contained in:
parent
7f3a619c98
commit
8d61607656
@ -9921,6 +9921,8 @@ static void ggml_compute_forward_add(
|
|||||||
case GGML_TYPE_Q4_K:
|
case GGML_TYPE_Q4_K:
|
||||||
case GGML_TYPE_Q5_K:
|
case GGML_TYPE_Q5_K:
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
|
case GGML_TYPE_TQ1_0:
|
||||||
|
case GGML_TYPE_TQ2_0:
|
||||||
case GGML_TYPE_IQ2_XXS:
|
case GGML_TYPE_IQ2_XXS:
|
||||||
case GGML_TYPE_IQ2_XS:
|
case GGML_TYPE_IQ2_XS:
|
||||||
case GGML_TYPE_IQ3_XXS:
|
case GGML_TYPE_IQ3_XXS:
|
||||||
@ -10299,6 +10301,8 @@ static void ggml_compute_forward_add1(
|
|||||||
case GGML_TYPE_Q4_K:
|
case GGML_TYPE_Q4_K:
|
||||||
case GGML_TYPE_Q5_K:
|
case GGML_TYPE_Q5_K:
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
|
case GGML_TYPE_TQ1_0:
|
||||||
|
case GGML_TYPE_TQ2_0:
|
||||||
case GGML_TYPE_IQ2_XXS:
|
case GGML_TYPE_IQ2_XXS:
|
||||||
case GGML_TYPE_IQ2_XS:
|
case GGML_TYPE_IQ2_XS:
|
||||||
case GGML_TYPE_IQ3_XXS:
|
case GGML_TYPE_IQ3_XXS:
|
||||||
@ -10427,6 +10431,8 @@ static void ggml_compute_forward_acc(
|
|||||||
case GGML_TYPE_Q4_K:
|
case GGML_TYPE_Q4_K:
|
||||||
case GGML_TYPE_Q5_K:
|
case GGML_TYPE_Q5_K:
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
|
case GGML_TYPE_TQ1_0:
|
||||||
|
case GGML_TYPE_TQ2_0:
|
||||||
case GGML_TYPE_IQ2_XXS:
|
case GGML_TYPE_IQ2_XXS:
|
||||||
case GGML_TYPE_IQ2_XS:
|
case GGML_TYPE_IQ2_XS:
|
||||||
case GGML_TYPE_IQ3_XXS:
|
case GGML_TYPE_IQ3_XXS:
|
||||||
@ -10562,16 +10568,7 @@ static void ggml_compute_forward_mul_f32(
|
|||||||
GGML_ASSERT( nb0 == sizeof(float));
|
GGML_ASSERT( nb0 == sizeof(float));
|
||||||
GGML_ASSERT(nb00 == sizeof(float));
|
GGML_ASSERT(nb00 == sizeof(float));
|
||||||
|
|
||||||
if (ggml_nelements(src1) == 1) {
|
if (nb10 == sizeof(float)) {
|
||||||
float scale = ((float *) src1->data)[0];
|
|
||||||
for (int64_t ir = ith; ir < nr; ir += nth) {
|
|
||||||
if (dst->data != src0->data) {
|
|
||||||
// src0 is same shape as dst => same indices
|
|
||||||
memcpy((char *)dst->data + ir*nb1, (char *)src0->data + ir*nb01, ne0 * sizeof(float));
|
|
||||||
}
|
|
||||||
ggml_vec_scale_f32(ne0, (float *) ((char *) dst->data + ir*nb1), scale);
|
|
||||||
}
|
|
||||||
} else if (nb10 == sizeof(float)) {
|
|
||||||
for (int64_t ir = ith; ir < nr; ir += nth) {
|
for (int64_t ir = ith; ir < nr; ir += nth) {
|
||||||
// src0 and dst are same shape => same indices
|
// src0 and dst are same shape => same indices
|
||||||
const int64_t i03 = ir/(ne02*ne01);
|
const int64_t i03 = ir/(ne02*ne01);
|
||||||
@ -13419,6 +13416,8 @@ static void ggml_compute_forward_out_prod(
|
|||||||
case GGML_TYPE_Q4_K:
|
case GGML_TYPE_Q4_K:
|
||||||
case GGML_TYPE_Q5_K:
|
case GGML_TYPE_Q5_K:
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
|
case GGML_TYPE_TQ1_0:
|
||||||
|
case GGML_TYPE_TQ2_0:
|
||||||
case GGML_TYPE_IQ2_XXS:
|
case GGML_TYPE_IQ2_XXS:
|
||||||
case GGML_TYPE_IQ2_XS:
|
case GGML_TYPE_IQ2_XS:
|
||||||
case GGML_TYPE_IQ3_XXS:
|
case GGML_TYPE_IQ3_XXS:
|
||||||
@ -13607,6 +13606,8 @@ static void ggml_compute_forward_set(
|
|||||||
case GGML_TYPE_Q4_K:
|
case GGML_TYPE_Q4_K:
|
||||||
case GGML_TYPE_Q5_K:
|
case GGML_TYPE_Q5_K:
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
|
case GGML_TYPE_TQ1_0:
|
||||||
|
case GGML_TYPE_TQ2_0:
|
||||||
case GGML_TYPE_IQ2_XXS:
|
case GGML_TYPE_IQ2_XXS:
|
||||||
case GGML_TYPE_IQ2_XS:
|
case GGML_TYPE_IQ2_XS:
|
||||||
case GGML_TYPE_IQ3_XXS:
|
case GGML_TYPE_IQ3_XXS:
|
||||||
|
Loading…
Reference in New Issue
Block a user