vega · dsmedia · May 11, 2026 · May 9, 2026 · May 10, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -40,3 +40,6 @@ jobs:
         run: npm ci
 
       - run: npm run build
+
+      - name: Validate datapackage
+        run: uv run pytest tests/ -v --runslow --limit-rows 250000
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -195,14 +195,18 @@ uv run --group dev pytest tests/
 
 # Slow tier — frictionless schema and row validation per resource.
 # Default is full read; flights-3m.parquet (~3M rows) takes minutes.
-uv run --group dev pytest tests/ --run-slow
+uv run --group dev pytest tests/ --runslow
 
-# Slow tier with a row cap — useful for quick iteration.
-uv run --group dev pytest tests/ --run-slow --limit-rows 100000
+# Slow tier with a row cap — matches what CI runs; lower for tighter iteration.
+uv run --group dev pytest tests/ --runslow --limit-rows 250000
 ```
 
-Not run in CI. The slow tier is the comprehensive validation step; the
-fast tier alone does not exercise frictionless schemas.
+CI runs the slow tier with `--limit-rows 250000`: `flights_3m`'s ~3M
+rows are sampled, every other resource is below the cap and validates
+in full. The fast tier is implicitly covered too — `npm run build`
+regenerates `datapackage.json` from on-disk data before the slow tier
+runs, so any byte/hash drift would surface either there or in the slow
+tier's schema validation.
 
 Resources whose schema/row failures are known and non-actionable (for
 example, `movies` whose schema is intentionally aspirational, or

diff --git a/pyproject.toml b/pyproject.toml
@@ -29,7 +29,7 @@ geo-species = [
 
 [tool.pytest.ini_options]
 markers = [
-  "slow: full schema/row validation via frictionless; opt in with --run-slow",
+  "slow: full schema/row validation via frictionless; opt in with --runslow",
 ]
 testpaths = ["tests"]
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,5 +1,5 @@
 """
-Pytest config: ``--run-slow`` and ``--limit-rows`` CLI options.
+Pytest config: ``--runslow`` and ``--limit-rows`` CLI options.
 
 The ``slow`` marker is registered in ``pyproject.toml``
 (``[tool.pytest.ini_options].markers``), matching the convention used in
@@ -16,7 +16,7 @@
 
 def pytest_addoption(parser: pytest.Parser) -> None:
     parser.addoption(
-        "--run-slow",
+        "--runslow",
         action="store_true",
         default=False,
         help="Run @pytest.mark.slow tests (frictionless schema/row validation).",
@@ -26,7 +26,7 @@ def pytest_addoption(parser: pytest.Parser) -> None:
         type=int,
         default=None,
         help=(
-            "Cap row reads in --run-slow tests at N rows per resource. "
+            "Cap row reads in --runslow tests at N rows per resource. "
             "Default is unlimited (full read). Use a small N for quick "
             "iteration; flights-3m takes minutes at full read."
         ),
@@ -41,10 +41,10 @@ def schema_limit_rows(request: pytest.FixtureRequest) -> int | None:
 def pytest_collection_modifyitems(
     config: pytest.Config, items: list[pytest.Item]
 ) -> None:
-    """Skip ``slow`` items unless ``--run-slow`` was passed."""
-    if config.getoption("--run-slow"):
+    """Skip ``slow`` items unless ``--runslow`` was passed."""
+    if config.getoption("--runslow"):
         return
-    skip_slow = pytest.mark.skip(reason="opt in with --run-slow")
+    skip_slow = pytest.mark.skip(reason="opt in with --runslow")
     for item in items:
         if "slow" in item.keywords:
             item.add_marker(skip_slow)
diff --git a/tests/test_datapackage.py b/tests/test_datapackage.py
@@ -9,9 +9,9 @@
   tabular JSON / arrow / parquet; hash-count supports only md5 and
   sha256, descriptor uses sha1).
 
-* Slow (``pytest --run-slow``) — frictionless schema and row validation
+* Slow (``pytest --runslow``) — frictionless schema and row validation
   per resource. Multi-minute on flights-3m at full read; opt in via the
-  ``--run-slow`` flag and pass ``--limit-rows N`` to cap row reads
+  ``--runslow`` flag and pass ``--limit-rows N`` to cap row reads
   during iteration. Default is full read.
 
 Resources whose schema/row check is known-broken upstream (``movies``
@@ -80,6 +80,7 @@ def git_blob_sha1(path: Path) -> str:
 
 @pytest.mark.parametrize("resource", _RESOURCES, ids=_RESOURCE_IDS)
 def test_file_exists(resource: dict) -> None:
+    """Catch descriptors that point at a missing or relocated data file."""
     assert "path" in resource, (
         f"descriptor regression: resource {resource.get('name')!r} has no 'path'"
     )
@@ -89,6 +90,7 @@ def test_file_exists(resource: dict) -> None:
 
 @pytest.mark.parametrize("resource", _RESOURCES, ids=_RESOURCE_IDS)
 def test_bytes_match(resource: dict) -> None:
+    """Catch on-disk edits where `bytes` in the descriptor wasn't regenerated."""
     assert "bytes" in resource, (
         f"descriptor regression: 'bytes' missing for {resource['name']!r}"
     )
@@ -102,6 +104,12 @@ def test_bytes_match(resource: dict) -> None:
 
 @pytest.mark.parametrize("resource", _RESOURCES, ids=_RESOURCE_IDS)
 def test_sha1_matches_git_blob(resource: dict) -> None:
+    """
+    Catch on-disk edits where `hash` in the descriptor wasn't regenerated.
+
+    Uses git's blob SHA-1 so the recorded hash matches `git ls-tree` —
+    catches edits that change content without changing file size.
+    """
     declared = resource.get("hash", "")
     assert declared, f"descriptor regression: 'hash' missing for {resource['name']!r}"
     assert declared.startswith("sha1:"), (