pandas-dev · jbrockmendel · Apr 11, 2026 · Apr 11, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst
@@ -296,6 +296,7 @@ I/O
 - Fixed :func:`read_json` with ``lines=True`` and ``chunksize`` to respect ``nrows``
   when the requested row count is not a multiple of the chunk size (:issue:`64025`)
 - Bug in :meth:`DataFrame.__repr__` where horizontally truncated output could exceed the terminal width by up to 4 characters (:issue:`32461`)
+- Bug in :meth:`DataFrame.to_csv` where ``chunksize`` could produce inconsistent datetime and timedelta formatting across chunks (:issue:`55481`)
 - Bug in :meth:`DataFrame.to_stata` raising ``KeyError`` when column names require renaming and ``convert_dates`` is specified for a different column (:issue:`60536`)
 - Fixed :func:`read_json` with ``lines=True`` and ``nrows=0`` to return an empty DataFrame (:issue:`64025`)
 - Fixed bug in :meth:`HDFStore.select` where passing ``where`` as a list of conditions referencing caller-scope variables failed on Python 3.12+ due to :pep:`709` inlining list comprehension stack frames (:issue:`64881`)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -20,7 +20,10 @@
 
 import numpy as np
 
-from pandas._libs import writers as libwriters
+from pandas._libs import (
+    tslib,
+    writers as libwriters,
+)
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.generic import (
@@ -31,6 +34,10 @@
 )
 from pandas.core.dtypes.missing import notna
 
+from pandas.core.arrays import (
+    DatetimeArray,
+    TimedeltaArray,
+)
 from pandas.core.indexes.api import Index
 
 from pandas.io.common import get_handle
@@ -47,6 +54,8 @@
         npt,
     )
 
+    from pandas import DataFrame
+
     from pandas.io.formats.format import DataFrameFormatter
 
 
@@ -307,30 +316,168 @@ def _generate_multiindex_header_rows(self) -> Iterator[list[Hashable]]:
     def _save_body(self) -> None:
         nrows = len(self.data_index)
         chunks = (nrows // self.chunksize) + 1
+
+        # GH#55481: pre-compute per-column date formats from the full data
+        # so that datetime/timedelta columns are formatted consistently
+        # across chunks.
+        col_formats = self._compute_col_date_formats()
+
+        # GH#55481: pre-format the index using the full data so that
+        # DatetimeIndex/TimedeltaIndex formatting is consistent across chunks.
+        formatted_index = self._preformat_index()
+
         for i in range(chunks):
             start_i = i * self.chunksize
             end_i = min(start_i + self.chunksize, nrows)
             if start_i >= end_i:
                 break
-            self._save_chunk(start_i, end_i)
+            self._save_chunk(start_i, end_i, col_formats, formatted_index)
+
+    def _preformat_index(self) -> npt.NDArray[np.object_] | None:
+        """Pre-format the index using the full data for consistency.
+
+        For DatetimeIndex/TimedeltaIndex, formatting depends on
+        _is_dates_only which must be determined from the full index,
+        not per-chunk slices.
+
+        Returns the fully formatted index array, or None if
+        no pre-formatting is needed.
+        """
+        if self.nlevels == 0 or self.date_format is not None:
+            return None
+
+        idx_values = self.data_index._values
+        if isinstance(idx_values, (DatetimeArray, TimedeltaArray)):
+            return self.data_index._get_values_for_csv(**self._number_format)
 
-    def _save_chunk(self, start_i: int, end_i: int) -> None:
+        return None
+
+    def _compute_col_date_formats(self) -> dict[int, bool] | None:
+        """Pre-compute _is_dates_only for datetime/timedelta columns using
+        the full column data, so that per-chunk formatting is consistent.
+
+        Returns a dict mapping column index to the _is_dates_only result
+        for the full column, or None if no pre-computation is needed.
+        """
+        if self.date_format is not None:
+            # User specified a format, no auto-detection needed
+            return None
+
+        result: dict[int, bool] = {}
+        for col_idx in range(self.obj.shape[1]):
+            arr = self.obj.iloc[:, col_idx].array
+            if isinstance(arr, (DatetimeArray, TimedeltaArray)):
+                result[col_idx] = arr._is_dates_only
+
+        return result if result else None
+
+    def _save_chunk(
+        self,
+        start_i: int,
+        end_i: int,
+        col_formats: dict[int, bool] | None,
+        formatted_index: npt.NDArray[np.object_] | None,
+    ) -> None:
         # create the data for a chunk
         slicer = slice(start_i, end_i)
         df = self.obj.iloc[slicer]
 
-        res = df._get_values_for_csv(**self._number_format)
-        data = list(res._iter_column_arrays())
+        if col_formats is None:
+            res = df._get_values_for_csv(**self._number_format)
+            data = list(res._iter_column_arrays())
+        else:
+            data = list(self._format_chunk_columns(df, col_formats))
 
         ix = (
-            self.data_index[slicer]._get_values_for_csv(**self._number_format)
-            if self.nlevels != 0
-            else np.empty(end_i - start_i)
+            formatted_index[start_i:end_i]
+            if formatted_index is not None
+            else (
+                self.data_index[slicer]._get_values_for_csv(**self._number_format)
+                if self.nlevels != 0
+                else np.empty(end_i - start_i)
+            )
         )
+
         libwriters.write_csv_rows(
             data,
             ix,
             self.nlevels,
             self.cols,
             self.writer,
         )
+
+    def _format_chunk_columns(
+        self, df: DataFrame, col_formats: dict[int, bool]
+    ) -> list:
+        """Format chunk columns using pre-computed date format info.
+
+        For datetime/timedelta columns, uses the _is_dates_only result
+        from the full column to ensure consistent formatting.
+        """
+        from pandas.core.indexes.base import get_values_for_csv
+
+        data: list = []
+        for col_idx in range(df.shape[1]):
+            col_values = df.iloc[:, col_idx]._values
+
+            if col_idx in col_formats:
+                is_dates_only = col_formats[col_idx]
+                arr = df.iloc[:, col_idx].array
+                formatted = self._format_dt_column(arr, is_dates_only)
+            else:
+                formatted = get_values_for_csv(
+                    col_values,
+                    na_rep=self.na_rep,
+                    float_format=self.float_format,  # type: ignore[arg-type]
+                    date_format=self.date_format,
+                    decimal=self.decimal,
+                    quoting=self.quoting,
+                )
+            data.append(formatted)
+        return data
+
+    def _format_dt_column(
+        self,
+        arr: DatetimeArray | TimedeltaArray,
+        is_dates_only: bool,
+    ) -> npt.NDArray[np.object_]:
+        """Format a datetime or timedelta column using a pre-computed
+        _is_dates_only value from the full column."""
+        if isinstance(arr, DatetimeArray):
+            if is_dates_only:
+                date_format = "%Y-%m-%d"
+            else:
+                date_format = None
+
+            # Call format_array_from_datetime directly to bypass the
+            # per-chunk _is_dates_only check in _format_native_types.
+            result = tslib.format_array_from_datetime(
+                arr.asi8,
+                tz=arr.tz,
+                format=date_format,
+                na_rep=self.na_rep,
+                reso=arr._creso,
+            )
+        else:
+            # TimedeltaArray
+            from pandas.io.formats.format import get_format_timedelta64
+
+            if is_dates_only:
+                # Use the default behavior (even_days format)
+                formatter = get_format_timedelta64(arr, na_rep=self.na_rep)
+            else:
+                # Force long format to prevent per-chunk auto-detection
+                from pandas import Timedelta
+
+                def formatter(
+                    x: object, _na_rep: str | float = self.na_rep
+                ) -> str | float:
+                    if x is None or x != x:
+                        return _na_rep
+                    if not isinstance(x, Timedelta):
+                        x = Timedelta(x)
+                    return x._repr_base(format="long")  # type: ignore[attr-defined]
+
+            result = np.frompyfunc(formatter, 1, 1)(arr._ndarray)
+
+        return np.asarray(result, dtype=object)
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -860,6 +860,98 @@ def test_callable_float_format_compatibility():
     assert result == expected
 
 
+def test_to_csv_chunksize_datetime_column():
+    # GH#55481
+    dti = pd.date_range("2016-01-01", periods=3, freq="D")
+    df = DataFrame({"A": dti})
+    df.iloc[-1, -1] += pd.Timedelta(minutes=1)
+
+    result = df.to_csv(chunksize=1)
+    expected_rows = [
+        ",A",
+        "0,2016-01-01 00:00:00",
+        "1,2016-01-02 00:00:00",
+        "2,2016-01-03 00:01:00",
+    ]
+    expected = tm.convert_rows_list_to_csv_str(expected_rows)
+    assert result == expected
+
+
+def test_to_csv_chunksize_datetime_column_dates_only():
+    # GH#55481 - dates-only columns should still use compact format
+    dti = pd.date_range("2016-01-01", periods=3, freq="D")
+    df = DataFrame({"A": dti})
+
+    result = df.to_csv(chunksize=1)
+    expected_rows = [",A", "0,2016-01-01", "1,2016-01-02", "2,2016-01-03"]
+    expected = tm.convert_rows_list_to_csv_str(expected_rows)
+    assert result == expected
+
+
+def test_to_csv_chunksize_timedelta_column():
+    # GH#55481
+    tdi = pd.timedelta_range("1D", periods=3, freq="D")
+    df = DataFrame({"A": tdi})
+    df.iloc[-1, -1] += pd.Timedelta(minutes=1)
+
+    result = df.to_csv(chunksize=1)
+    expected_rows = [
+        ",A",
+        "0,1 days 00:00:00",
+        "1,2 days 00:00:00",
+        "2,3 days 00:01:00",
+    ]
+    expected = tm.convert_rows_list_to_csv_str(expected_rows)
+    assert result == expected
+
+
+def test_to_csv_chunksize_datetime_index():
+    # GH#55481 - DatetimeIndex should also be consistent across chunks
+    dti = pd.date_range("2016-01-01", periods=3, freq="D")
+    df = DataFrame({"A": [1, 2, 3]}, index=dti)
+    df.index = df.index.insert(2, dti[-1] + pd.Timedelta(minutes=1)).delete(3)
+
+    result = df.to_csv(chunksize=1)
+    expected_rows = [
+        ",A",
+        "2016-01-01 00:00:00,1",
+        "2016-01-02 00:00:00,2",
+        "2016-01-03 00:01:00,3",
+    ]
+    expected = tm.convert_rows_list_to_csv_str(expected_rows)
+    assert result == expected
+
+
+def test_to_csv_chunksize_multiple_datetime_columns():
+    # GH#55481 - each column should be formatted based on its own data
+    dti_a = pd.date_range("2016-01-01", periods=3, freq="D")
+    dti_b = pd.date_range("2020-06-01", periods=3, freq="D")
+    df = DataFrame({"A": dti_a, "B": dti_b})
+    df.iloc[-1, 0] += pd.Timedelta(minutes=1)  # A is not dates-only
+    # B stays dates-only
+
+    result = df.to_csv(chunksize=1)
+    expected_rows = [
+        ",A,B",
+        "0,2016-01-01 00:00:00,2020-06-01",
+        "1,2016-01-02 00:00:00,2020-06-02",
+        "2,2016-01-03 00:01:00,2020-06-03",
+    ]
+    expected = tm.convert_rows_list_to_csv_str(expected_rows)
+    assert result == expected
+
+
+def test_to_csv_chunksize_matches_no_chunksize():
+    # GH#55481 - output should be the same regardless of chunksize
+    dti = pd.date_range("2016-01-01", periods=3, freq="D")
+    df = DataFrame({"A": dti})
+    df.iloc[-1, -1] += pd.Timedelta(minutes=1)
+
+    result_chunked = df.to_csv(chunksize=1)
+    result_default = df.to_csv()
+    assert result_chunked == result_default
+
+
 def test_no_float_format():
     df = DataFrame({"A": [1.23, 4.56]})
     result = df.to_csv(float_format=None, lineterminator="\n")