diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 3993e27ba..4fe89de8c 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -3330,7 +3330,7 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y, for (size_t m = 0; m < 32; ++m) { uint8_t q = 0; for (size_t n = 0; n < 5; ++n) { - int xi = nearest_int(x[m + n*32] * id) + 1; // -1, 0, 1 -> 0, 1, 2 + int xi = lroundf(x[m + n*32] * id) + 1; // -1, 0, 1 -> 0, 1, 2 q *= 3; q += xi; } @@ -3345,7 +3345,7 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y, for (size_t m = 0; m < 16; ++m) { uint8_t q = 0; for (size_t n = 0; n < 5; ++n) { - int xi = nearest_int(x[m + n*16] * id) + 1; // -1, 0, 1 -> 0, 1, 2 + int xi = lroundf(x[m + n*16] * id) + 1; // -1, 0, 1 -> 0, 1, 2 q *= 3; q += xi; } @@ -3360,7 +3360,7 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y, uint8_t q = 0; for (size_t m = 0; m < 4; ++m) { // -1, 0, 1 -> 0, 1, 2 - int xi = nearest_int(x[j + m*sizeof(y->qh)] * id) + 1; + int xi = lroundf(x[j + m*sizeof(y->qh)] * id) + 1; q *= 3; q += xi; } @@ -3396,7 +3396,7 @@ void quantize_row_tq2_0_ref(const float * restrict x, block_tq2_0 * restrict y, uint8_t q = 0; for (size_t n = 0; n < 4; ++n) { // -1, 0, 1 -> 0, 1, 2 - int xi = nearest_int(x[m + n*32] * id) + 1; + int xi = lroundf(x[m + n*32] * id) + 1; q += (xi & 3) << (2*n); } y[i].qs[j + m] = q; diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py index ff589b852..3c8ba82e1 100644 --- a/gguf-py/gguf/quants.py +++ b/gguf-py/gguf/quants.py @@ -574,6 +574,87 @@ class Q6_K(__Quant, qtype=GGMLQuantizationType.Q6_K): return (d * q).reshape((n_blocks, QK_K)) +class TQ1_0(__Quant, qtype=GGMLQuantizationType.TQ1_0): + @classmethod + def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray: + n_blocks = blocks.shape[0] + + d = abs(blocks).max(axis=-1, keepdims=True) + with np.errstate(divide="ignore"): + id = np.where(d == 0, 0, 1 / d) + qs = np_roundf(blocks * id) + qs = (qs.astype(np.int8) + np.int8(1)).astype(np.uint8) + + qs0, qs1, qh = qs[..., :(32 * 5)], qs[..., (32 * 5):(48 * 5)], qs[..., (48 * 5):] + qs0 = qs0.reshape((n_blocks, -1, 5, 32)) * np.array([81, 27, 9, 3, 1], dtype=np.uint8).reshape((1, 1, 5, 1)) + qs0 = np.sum(qs0, axis=-2).reshape((n_blocks, -1)) + qs1 = qs1.reshape((n_blocks, -1, 5, 16)) * np.array([81, 27, 9, 3, 1], dtype=np.uint8).reshape((1, 1, 5, 1)) + qs1 = np.sum(qs1, axis=-2).reshape((n_blocks, -1)) + qh = qh.reshape((n_blocks, -1, 4, 4)) * np.array([81, 27, 9, 3], dtype=np.uint8).reshape((1, 1, 4, 1)) + qh = np.sum(qh, axis=-2).reshape((n_blocks, -1)) + qs = np.concatenate([qs0, qs1, qh], axis=-1) + qs = (qs.astype(np.uint16) * 256 + (243 - 1)) // 243 + + qs = qs.astype(np.uint8) + d = d.astype(np.float16).view(np.uint8) + + return np.concatenate([qs, d], axis=-1) + + @classmethod + def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray: + n_blocks = blocks.shape[0] + + qs, rest = np.hsplit(blocks, [(QK_K - 4 * QK_K // 64) // 5]) + qh, d = np.hsplit(rest, [QK_K // 64]) + + d = d.view(np.float16).astype(np.float32) + + qs0, qs1 = qs[..., :32], qs[..., 32:] + qs0 = qs0.reshape((n_blocks, -1, 1, 32)) * np.array([1, 3, 9, 27, 81], dtype=np.uint8).reshape((1, 1, 5, 1)) + qs0 = qs0.reshape((n_blocks, -1)) + qs1 = qs1.reshape((n_blocks, -1, 1, 16)) * np.array([1, 3, 9, 27, 81], dtype=np.uint8).reshape((1, 1, 5, 1)) + qs1 = qs1.reshape((n_blocks, -1)) + qh = qh.reshape((n_blocks, -1, 1, 4)) * np.array([1, 3, 9, 27], dtype=np.uint8).reshape((1, 1, 4, 1)) + qh = qh.reshape((n_blocks, -1)) + qs = np.concatenate([qs0, qs1, qh], axis=-1) + qs = ((qs.astype(np.uint16) * 3) >> 8).astype(np.int8) - np.int8(1) + + return (d * qs.astype(np.float32)) + + +class TQ2_0(__Quant, qtype=GGMLQuantizationType.TQ2_0): + @classmethod + def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray: + n_blocks = blocks.shape[0] + + d = abs(blocks).max(axis=-1, keepdims=True) + with np.errstate(divide="ignore"): + id = np.where(d == 0, 0, 1 / d) + qs = np_roundf(blocks * id) + qs = (qs.astype(np.int8) + np.int8(1)).astype(np.uint8) + + qs = qs.reshape((n_blocks, -1, 4, 32)) << np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1)) + qs = qs[..., 0, :] | qs[..., 1, :] | qs[..., 2, :] | qs[..., 3, :] + qs = qs.reshape((n_blocks, -1)) + + d = d.astype(np.float16).view(np.uint8) + + return np.concatenate([qs, d], axis=-1) + + @classmethod + def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray: + n_blocks = blocks.shape[0] + + qs, d = np.hsplit(blocks, [QK_K // 4]) + + d = d.view(np.float16).astype(np.float32) + + qs = qs.reshape((n_blocks, -1, 1, 32)) >> np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1)) + qs = (qs & 0x03).reshape((n_blocks, -1)).astype(np.int8) - np.int8(1) + + return (d * qs.astype(np.float32)) + + class IQ2_XXS(__Quant, qtype=GGMLQuantizationType.IQ2_XXS): ksigns: bytes = ( b"\x00\x81\x82\x03\x84\x05\x06\x87\x88\x09\x0a\x8b\x0c\x8d\x8e\x0f" diff --git a/gguf-py/tests/test_quants.py b/gguf-py/tests/test_quants.py index 8b7a85c2c..762067814 100755 --- a/gguf-py/tests/test_quants.py +++ b/gguf-py/tests/test_quants.py @@ -66,6 +66,7 @@ class GGMLQuants: for t in ( "q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "q2_K", "q3_K", "q4_K", "q5_K", "q6_K", + "tq1_0", "tq2_0", "iq2_xxs", "iq2_xs", "iq2_s", "iq3_xxs", "iq3_s", "iq1_s", "iq1_m", "iq4_nl", "iq4_xs", ):