From 49afd8aa5be7975d6d3c539317be169dbf504b56 Mon Sep 17 00:00:00 2001 From: ivanharvard <144486839+ivanharvard@users.noreply.github.com> Date: Wed, 30 Jul 2025 15:34:38 -0400 Subject: [PATCH 1/2] added WordLexer support for txt files --- compare50/_data.py | 10 ++++++++-- compare50/lexers.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 compare50/lexers.py diff --git a/compare50/_data.py b/compare50/_data.py index c9b17d7..add6ec5 100644 --- a/compare50/_data.py +++ b/compare50/_data.py @@ -8,6 +8,8 @@ import pygments import pygments.lexers +from .lexers import WordLexer + __all__ = ["Pass", "Comparator", "File", "Submission", "Pass", "Span", "Score", "Comparison", "Token"] @@ -191,14 +193,18 @@ def lexer(self): # get lexer for this file type try: - lexer = pygments.lexers.get_lexer_for_filename(self.name.name) + if ext == ".txt": + lexer = WordLexer() + else: + lexer = pygments.lexers.get_lexer_for_filename(self.name.name) + self._lexer_cache[ext] = lexer return lexer except pygments.util.ClassNotFound: try: return pygments.lexers.guess_lexer(self.read()) except pygments.util.ClassNotFound: - return pygments.lexers.special.TextLexer() + return WordLexer() @classmethod def get(cls, id): diff --git a/compare50/lexers.py b/compare50/lexers.py new file mode 100644 index 0000000..fadf605 --- /dev/null +++ b/compare50/lexers.py @@ -0,0 +1,16 @@ +from pygments.lexer import RegexLexer +from pygments.token import Text, Name + +class WordLexer(RegexLexer): + """Custom compare50 lexer that creates a token based on each 'word'.""" + name = "WordLexer" + aliases = ["word"] + filenames = ["*.txt"] + + tokens = { + "root": [ + (r"\s+", Text), # whitespace + (r"\w+", Name), # word (alphanumeric) + (r"\W", Text), # punctuation or other + ] + } From c32279a3bc246dd240e865905ea5ca95ca7749cc Mon Sep 17 00:00:00 2001 From: Rongxin Liu Date: Wed, 30 Jul 2025 17:34:19 -0400 Subject: [PATCH 2/2] remove unused import of os --- compare50/_data.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/compare50/_data.py b/compare50/_data.py index add6ec5..2d0dbd9 100644 --- a/compare50/_data.py +++ b/compare50/_data.py @@ -1,13 +1,11 @@ import abc from collections.abc import Mapping, Sequence -import os import pathlib import numbers import attr import pygments import pygments.lexers - from .lexers import WordLexer