diff --git a/python/dolma/taggers/code/code_taggers.py b/python/dolma/taggers/code/code_taggers.py index 31b57087..43779a9b 100644 --- a/python/dolma/taggers/code/code_taggers.py +++ b/python/dolma/taggers/code/code_taggers.py @@ -138,7 +138,7 @@ def _score(self, text: str, copyright_spans: List[Span]) -> float: else: span = copyright_spans[0] # percentage of content affected - score = (span.end - span.start + 1) * 1.0 / len(text) + score = (span.end - span.start) * 1.0 / len(text) except ZeroDivisionError: score = -1.0 return score