gguf : add python reader example (#5216)

* Update CMakeLists.txt * Create reader.py * Update reader.py * Update reader.py another whitespace :| * Update reader.py * lintlintlint
2024-12-24 02:14:35 +00:00 · 2024-02-13 18:56:38 +01:00 · 2024-02-13 18:56:38 +01:00 · 6c00a06692
commit 6c00a06692
parent ea9c8e1143
2 changed files with 46 additions and 0 deletions
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -38,6 +38,7 @@ else()
    add_subdirectory(speculative)
    add_subdirectory(lookahead)
    add_subdirectory(lookup)
+    add_subdirectory(gguf)
    add_subdirectory(train-text-from-scratch)
    add_subdirectory(imatrix)
    if (LLAMA_BUILD_SERVER)
--- a/gguf-py/examples/reader.py
+++ b/gguf-py/examples/reader.py
@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+import sys
+from pathlib import Path
+from gguf.gguf_reader import GGUFReader
+
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+
+def read_gguf_file(gguf_file_path):
+    """
+    Reads and prints key-value pairs and tensor information from a GGUF file in an improved format.
+
+    Parameters:
+    - gguf_file_path: Path to the GGUF file.
+    """
+
+    reader = GGUFReader(gguf_file_path)
+
+    # List all key-value pairs in a columnized format
+    print("Key-Value Pairs:")
+    max_key_length = max(len(key) for key in reader.fields.keys())
+    for key, field in reader.fields.items():
+        value = field.parts[field.data[0]]
+        print(f"{key:{max_key_length}} : {value}")
+    print("----")
+
+    # List all tensors
+    print("Tensors:")
+    tensor_info_format = "{:<30} | Shape: {:<15} | Size: {:<12} | Quantization: {}"
+    print(tensor_info_format.format("Tensor Name", "Shape", "Size", "Quantization"))
+    print("-" * 80)
+    for tensor in reader.tensors:
+        shape_str = "x".join(map(str, tensor.shape))
+        size_str = str(tensor.n_elements)
+        quantization_str = tensor.tensor_type.name
+        print(tensor_info_format.format(tensor.name, shape_str, size_str, quantization_str))
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print("Usage: reader.py <path_to_gguf_file>")
+        sys.exit(1)
+    gguf_file_path = sys.argv[1]
+    read_gguf_file(gguf_file_path)