From 31a1b0eeaa2c690f63772844fdac1ac24ed024c8 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Mon, 8 Jul 2024 16:34:39 -0400 Subject: [PATCH] llama : fix Viking pre-tokenizer regex The order was previously wrong, which caused errors in some tests. --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index c30d0adfe..b652762d2 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -15440,8 +15440,8 @@ struct llm_tokenizer_bpe { break; case LLAMA_VOCAB_PRE_TYPE_VIKING: regex_exprs = { - "\\p{N}", " ?[^(\\s|.,!?…。,、।۔،)]+", + "\\p{N}", }; break; default: