mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
gguf-dump : support i-quants (#5841)
Co-authored-by: Black_Fox <radekliska@gmail.com>
This commit is contained in:
parent
de9692a7d2
commit
87c2e8b279
@ -618,6 +618,14 @@ class GGMLQuantizationType(IntEnum):
|
|||||||
Q5_K = 13
|
Q5_K = 13
|
||||||
Q6_K = 14
|
Q6_K = 14
|
||||||
Q8_K = 15
|
Q8_K = 15
|
||||||
|
IQ2_XXS = 16
|
||||||
|
IQ2_XS = 17
|
||||||
|
IQ3_XXS = 18
|
||||||
|
IQ1_S = 19
|
||||||
|
IQ4_NL = 20
|
||||||
|
IQ3_S = 21
|
||||||
|
IQ2_S = 22
|
||||||
|
IQ4_XS = 23
|
||||||
|
|
||||||
|
|
||||||
class GGUFEndian(IntEnum):
|
class GGUFEndian(IntEnum):
|
||||||
@ -676,6 +684,14 @@ GGML_QUANT_SIZES = {
|
|||||||
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
|
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
|
||||||
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
|
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
|
||||||
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
|
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
|
||||||
|
GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
|
||||||
|
GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
|
||||||
|
GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
|
||||||
|
GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
|
||||||
|
GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
|
||||||
|
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
|
||||||
|
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
|
||||||
|
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user