diff --git a/python/dolma/taggers/punctuation.py b/python/dolma/taggers/punctuation.py index 3eafae36..0f8049a9 100644 --- a/python/dolma/taggers/punctuation.py +++ b/python/dolma/taggers/punctuation.py @@ -18,7 +18,7 @@ def __init__(self) -> None: "\U0001f300-\U0001f64f" "\U0001f680-\U0001f6ff" "\u2600-\u26ff\u2700-\u27bf" - r"]+" + r"]" r")+$", regex.UNICODE, )