Update test-tokenizer-random.py

Re-added type declarations
This commit is contained in:
Robert 2024-11-17 08:35:07 -08:00 committed by GitHub
parent bc8648fbbe
commit 883dc22d44
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -423,7 +423,7 @@ def compare_tokenizers(tokenizer1: TokenizerGroundtruth, tokenizer2: TokenizerLl
# return -1
# return min(len(ids1), len(ids2))
# Rewritten to use zip() and next() instead of for loop
def find_first_mismatch(ids1, ids2) -> int:
def find_first_mismatch(ids1: Sequence[Any], ids2: Sequence[Any]) -> int:
index = next((i for i, (a, b) in enumerate(zip(ids1, ids2)) if a != b), -1)
if index < 0 and len(ids1) != len(ids2):
index = min(len(ids1), len(ids2))