Skip to content
16 changes: 16 additions & 0 deletions gittensor/validator/utils/load_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,22 @@ def supports_tree_sitter(self, extension: Optional[str]) -> bool:
config = self.language_configs.get(ext)
return config is not None and config.language is not None

def is_line_count_extension(self, extension: Optional[str]) -> bool:
"""Check if a file extension uses line-count scoring.

Returns True when the extension is either in NON_CODE_EXTENSIONS or is
configured in programming_languages.json with no tree-sitter language
(language=None). Unknown extensions that are absent from the config
entirely remain skipped-unsupported and return False.
"""
if not extension:
return False
ext = extension.lstrip('.').lower()
if ext in NON_CODE_EXTENSIONS:
return True
config = self.language_configs.get(ext)
return config is not None and config.language is None


def _get_weights_dir() -> Path:
return Path(__file__).parent.parent / 'weights'
Expand Down
3 changes: 1 addition & 2 deletions gittensor/validator/utils/tree_sitter_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
INLINE_TEST_PATTERNS,
MAX_FILE_SIZE_BYTES,
MAX_LINES_SCORED_FOR_NON_CODE_EXT,
NON_CODE_EXTENSIONS,
TEST_FILE_CONTRIBUTION_WEIGHT,
TREE_SITTER_PARSE_TIMEOUT_MICROS,
)
Expand Down Expand Up @@ -277,7 +276,7 @@ def calculate_token_score_from_file_changes(
is_test_file=is_test_file,
scoring_method='skipped',
)
elif ext in NON_CODE_EXTENSIONS:
elif weights.is_line_count_extension(ext):
lines_to_score = min(file.changes, MAX_LINES_SCORED_FOR_NON_CODE_EXT)
lang_config = programming_languages.get(ext)
lang_weight = lang_config.weight if lang_config else DEFAULT_PROGRAMMING_LANGUAGE_WEIGHT
Expand Down
88 changes: 88 additions & 0 deletions tests/validator/test_token_scoring_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,5 +617,93 @@ def test_added_file_null_old_content_scores_as_new_file(self, weights):
assert file_result.nodes_scored > 0


class TestNullLanguageLineCountScoring:
"""Regression tests for configured null-language extensions.

Every extension in programming_languages.json that has no ``language``
field (no tree-sitter parser) must route to ``line-count`` scoring, not
``skipped-unsupported``. Unknown extensions that are absent from the
config entirely should remain ``skipped-unsupported``.
"""

@pytest.fixture
def weights(self) -> TokenConfig:
return load_token_config()

@pytest.fixture
def prog_langs(self):
return load_programming_language_weights()

def _score_file(self, filename: str, content: str, weights: TokenConfig, prog_langs):
fc = FileChange(
pr_number=1,
repository_full_name='test/repo',
filename=filename,
changes=content.count('\n') or 1,
additions=content.count('\n') or 1,
deletions=0,
status='added',
)
result = calculate_token_score_from_file_changes(
[fc],
{filename: FileContentPair(old_content=None, new_content=content)},
weights,
prog_langs,
)
return result.file_results[0]

def test_graphql_scores_line_count(self, weights, prog_langs):
fr = self._score_file('schema.graphql', 'type Query { hello: String }\n', weights, prog_langs)
assert fr.scoring_method == 'line-count'
assert fr.score > 0.0

def test_gitignore_scores_line_count(self, weights, prog_langs):
fr = self._score_file('.gitignore', 'node_modules/\n*.pyc\n', weights, prog_langs)
assert fr.scoring_method == 'line-count'
assert fr.score > 0.0

def test_unknown_extension_stays_skipped_unsupported(self, weights, prog_langs):
fr = self._score_file('data.xyz123', 'some content\n', weights, prog_langs)
assert fr.scoring_method == 'skipped-unsupported'
assert fr.score == 0.0

def test_graphql_score_uses_configured_weight(self, weights, prog_langs):
"""graphql weight=1.0; one changed line → score should equal 1.0."""
fc = FileChange(
pr_number=1,
repository_full_name='test/repo',
filename='schema.graphql',
changes=1,
additions=1,
deletions=0,
status='added',
)
result = calculate_token_score_from_file_changes(
[fc],
{'schema.graphql': FileContentPair(old_content=None, new_content='type Query { hello: String }\n')},
weights,
prog_langs,
)
fr = result.file_results[0]
assert fr.scoring_method == 'line-count'
assert fr.score == pytest.approx(1.0, abs=1e-6)

def test_all_configured_null_language_extensions_are_line_count_reachable(self, weights, prog_langs):
"""Config-coverage guard: every programming_languages.json entry with language=None
must route to line-count, not skipped-unsupported. This catches future drift
where a new null-language weight is added to the JSON but the scorer silently
ignores it."""
null_lang_exts = [ext for ext, cfg in prog_langs.items() if cfg.language is None]
assert null_lang_exts, 'Expected at least one null-language extension in programming_languages.json'

for ext in null_lang_exts:
filename = f'testfile.{ext}'
fr = self._score_file(filename, 'line one\nline two\n', weights, prog_langs)
assert fr.scoring_method == 'line-count', (
f'Extension .{ext} is configured in programming_languages.json with language=None '
f'but scored as {fr.scoring_method!r} instead of line-count'
)


if __name__ == '__main__':
pytest.main([__file__, '-v'])
Loading