gguf_dump.py: fix markddown kv array print (#8588)

* gguf_dump.py: fix markddown kv array print

* Update gguf-py/scripts/gguf_dump.py

Co-authored-by: compilade <git@compilade.net>

* gguf_dump.py: refactor kv array string handling

* gguf_dump.py: escape backticks inside of strings

* gguf_dump.py: inline code markdown escape handler added

>>> escape_markdown_inline_code("hello world")
'`hello world`'
>>> escape_markdown_inline_code("hello ` world")
'``hello ` world``'

* gguf_dump.py: handle edge case about backticks on start or end of a string

---------

Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
Brian 2024-07-20 17:35:25 +10:00 committed by GitHub
parent 87e397d00b
commit c3776cacab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,6 +4,7 @@ from __future__ import annotations
import logging import logging
import argparse import argparse
import os import os
import re
import sys import sys
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -244,26 +245,58 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
else: else:
pretty_type = str(field.types[-1].name) pretty_type = str(field.types[-1].name)
def escape_markdown_inline_code(value_string):
# Find the longest contiguous sequence of backticks in the string then
# wrap string with appropriate number of backticks required to escape it
max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0)
inline_code_marker = '`' * (max_backticks + 1)
# If the string starts or ends with a backtick, add a space at the beginning and end
if value_string.startswith('`') or value_string.endswith('`'):
value_string = f" {value_string} "
return f"{inline_code_marker}{value_string}{inline_code_marker}"
total_elements = len(field.data) total_elements = len(field.data)
value = "" value = ""
if len(field.types) == 1: if len(field.types) == 1:
curr_type = field.types[0] curr_type = field.types[0]
if curr_type == GGUFValueType.STRING: if curr_type == GGUFValueType.STRING:
value = repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]) truncate_length = 60
value_string = str(bytes(field.parts[-1]), encoding='utf-8')
if len(value_string) > truncate_length:
head = escape_markdown_inline_code(value_string[:truncate_length // 2])
tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
value = "{head}...{tail}".format(head=head, tail=tail)
else:
value = escape_markdown_inline_code(value_string)
elif curr_type in reader.gguf_scalar_to_np: elif curr_type in reader.gguf_scalar_to_np:
value = str(field.parts[-1][0]) value = str(field.parts[-1][0])
else: else:
if field.types[0] == GGUFValueType.ARRAY: if field.types[0] == GGUFValueType.ARRAY:
curr_type = field.types[1] curr_type = field.types[1]
array_elements = []
if curr_type == GGUFValueType.STRING: if curr_type == GGUFValueType.STRING:
render_element = min(5, total_elements) render_element = min(5, total_elements)
for element_pos in range(render_element): for element_pos in range(render_element):
value += repr(str(bytes(field.parts[-1 - element_pos]), encoding='utf-8')[:5]) + (", " if total_elements > 1 else "") truncate_length = 30
value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
if len(value_string) > truncate_length:
head = escape_markdown_inline_code(value_string[:truncate_length // 2])
tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
value = "{head}...{tail}".format(head=head, tail=tail)
else:
value = escape_markdown_inline_code(value_string)
array_elements.append(value)
elif curr_type in reader.gguf_scalar_to_np: elif curr_type in reader.gguf_scalar_to_np:
render_element = min(7, total_elements) render_element = min(7, total_elements)
for element_pos in range(render_element): for element_pos in range(render_element):
value += str(field.parts[-1 - element_pos][0]) + (", " if total_elements > 1 else "") array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0]))
value = f'[ {value}{" ..." if total_elements > 1 else ""} ]'
value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]'
kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value}) kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
kv_dump_table_header_map = [ kv_dump_table_header_map = [
@ -382,7 +415,7 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n" markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
markdown_content += "\n\n" markdown_content += "\n\n"
print(markdown_content) # noqa: NP100 print(markdown_content) # noqa: NP100
def main() -> None: def main() -> None: