laminlabs · falexwolf · May 22, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/scripts/run_loading_benchmark_on_collection.py b/scripts/run_loading_benchmark_on_collection.py
@@ -146,20 +146,9 @@ def _largest_divisor_at_most(value: int, upper_bound: int) -> int:
     return 1
 
 
-def _log_benchmark_result(
-    *,
-    tool: str,
-    collection_key: str,
-    n_datasets: int,
-    n_samples_per_sec: float,
-    n_samples: int,
-    n_samples_collection: int,
-    num_workers: int,
-    batch_size: int,
-    chunk_size: int,
-) -> None:
+def _create_benchmarking_sheet() -> ln.Record:
     benchmark_feature_type = ln.Feature(name="Benchmarks", is_type=True).save()
-    benchmark_features = {
+    feature_dtypes = {
         "tool": str,
         "collection": ln.Collection,
         "n_datasets": int,
@@ -178,30 +167,20 @@ def _log_benchmark_result(
         feature_name: ln.Feature(
             name=feature_name, dtype=dtype, type=benchmark_feature_type
         ).save()
-        for feature_name, dtype in benchmark_features.items()
-    }
-
-    new_result = {
-        features["tool"]: tool,
-        features["collection"]: collection_key,
-        features["n_datasets"]: n_datasets,
-        features["n_samples_per_sec"]: n_samples_per_sec,
-        features["n_samples_loaded"]: n_samples,
-        features["n_samples_collection"]: n_samples_collection,
-        features["num_workers"]: num_workers,
-        features["batch_size"]: batch_size,
-        features["chunk_size"]: chunk_size,
-        features["compute_spec"]: compute_spec.get_aws_sagemaker_instance_type(),
-        features["run"]: ln.context.run,
-        features["timestamp"]: datetime.datetime.now(),
-        features["user"]: ln.setup.settings.user.handle,
+        for feature_name, dtype in feature_dtypes.items()
     }
+    schema = ln.Schema(
+        list(features.values()), name="loading_benchmark_result_schema"
+    ).save()
 
     benchmarks = ln.Record(name="Benchmarks", is_type=True).save()
-    task = ln.Record(
-        name="run_loading_benchmark_on_collection.py", type=benchmarks, is_type=True
+    sheet = ln.Record(
+        name="run_loading_benchmark_on_collection.py",
+        type=benchmarks,
+        is_type=True,
+        schema=schema,
     ).save()
-    ln.Record(type=task, features=new_result).save()
+    return sheet
 
 
 @click.command()
@@ -297,18 +276,30 @@ def run(
         )
 
     n_samples_per_sec, _, _ = benchmark_loader(loader, n_samples, batch_size)
-
-    _log_benchmark_result(
-        tool=tool,
-        collection_key=collection_key,
-        n_datasets=n_datasets,
-        n_samples_per_sec=n_samples_per_sec,
-        n_samples=n_samples,
-        n_samples_collection=n_samples_collection,
-        num_workers=num_workers,
-        batch_size=batch_size,
-        chunk_size=chunk_size,
-    )
+    sheet = ln.Record.filter(
+        name="run_loading_benchmark_on_collection.py"
+    ).one_or_none()
+    if sheet is None:
+        sheet = _create_benchmarking_sheet()
+
+    ln.Record(
+        type=sheet,
+        features={
+            "tool": tool,
+            "collection": collection_key,
+            "n_datasets": n_datasets,
+            "n_samples_per_sec": n_samples_per_sec,
+            "n_samples_loaded": n_samples,
+            "n_samples_collection": n_samples_collection,
+            "num_workers": num_workers,
+            "batch_size": batch_size,
+            "chunk_size": chunk_size,
+            "compute_spec": compute_spec.get_aws_sagemaker_instance_type(),
+            "run": ln.context.run,
+            "timestamp": datetime.datetime.now(),
+            "user": ln.setup.settings.user.handle,
+        },
+    ).save()
 
 
 if __name__ == "__main__":