From f9fc08ab763aedb4ef541d6fbcccde186ba11314 Mon Sep 17 00:00:00 2001 From: Chessing234 Date: Thu, 16 Apr 2026 05:07:03 +0530 Subject: [PATCH] Fix BaseBucketApi.add incrementing _total per call instead of per value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BaseBucketApi.total's docstring states it returns 'the total number of values added to the tracker', but BaseBucketApi.add() increments _total by 1 per (value, count) iteration regardless of count. So tracker.add(values=[v0, v1], counts=[5, 3]) sets total=2 instead of 8. This matters when reconstructing a tracker from a SummaryTuple: each bin in the summary represents 'count' original observations, and the total should reflect that multiplicity (this is exactly why add_summary() saves prev_count and restores total/sum afterwards — otherwise add() leaves them off). Increment _total by count to match _sum's per-value accumulation and the public docstring. --- python/dolma/core/binning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dolma/core/binning.py b/python/dolma/core/binning.py index 3afee92f..9722ffd9 100644 --- a/python/dolma/core/binning.py +++ b/python/dolma/core/binning.py @@ -232,7 +232,7 @@ def add(self, values: Union[List[Union[int, float]], Union[int, float]], counts: for value, count in zip(values, counts): self._add(value, count) - self._total += 1 + self._total += count self._sum += value * count def add_summary(self, summary: SummaryTuple):