pandas-dev · jorisvandenbossche · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5477,6 +5477,18 @@ def predicate(arr: ArrayLike) -> bool:
         mgr = self._mgr._get_data_subset(predicate).copy(deep=False)
         return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)
 
+    def _select_dtypes_indices(self, dtype_class) -> np.ndarray:
+        """
+        Return the indices of the columns of a given dtype.
+
+        Currently only works given a class, so mostly useful for ExtensionDtypes.
+        """
+
+        def predicate(arr: ArrayLike) -> bool:
+            return isinstance(arr.dtype, dtype_class)
+
+        return self._mgr._get_data_subset_indices(predicate)
+
     def insert(
         self,
         loc: int,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7127,7 +7127,7 @@ def fillna(
             if axis == 1:
                 # Check that all columns in result have the same dtype
                 # otherwise don't bother with fillna and losing accurate dtypes
-                unique_dtypes = algos.unique(self._mgr.get_dtypes())
+                unique_dtypes = self._mgr.get_unique_dtypes()
                 if len(unique_dtypes) > 1:
                     raise ValueError(
                         "All columns must have the same dtype, but got dtypes: "

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -336,6 +336,9 @@ def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool:
         blk = self.blocks[blkno]
         return any(blk is ref() for ref in mgr.blocks[blkno].refs.referenced_blocks)
 
+    def get_unique_dtypes(self) -> npt.NDArray[np.object_]:
+        return algos.unique([blk.dtype for blk in self.blocks])
+
     def get_dtypes(self) -> npt.NDArray[np.object_]:
         dtypes = np.array([blk.dtype for blk in self.blocks], dtype=object)
         return dtypes.take(self.blknos)
@@ -656,6 +659,11 @@ def _get_data_subset(self, predicate: Callable) -> Self:
         blocks = [blk for blk in self.blocks if predicate(blk.values)]
         return self._combine(blocks)
 
+    def _get_data_subset_indices(self, predicate: Callable) -> np.ndarray:
+        blocks = [blk for blk in self.blocks if predicate(blk.values)]
+        indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks]))
+        return indexer
+
     def get_bool_data(self) -> Self:
         """
         Select blocks that are bool-dtype and columns from object-dtype blocks

diff --git a/pandas/io/_util.py b/pandas/io/_util.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import datetime as dt
 from typing import (
     TYPE_CHECKING,
     Literal,
@@ -10,6 +11,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
+from pandas._libs.tslibs import timezones
 from pandas.compat import (
     pa_version_under18p0,
     pa_version_under19p0,
@@ -35,6 +37,9 @@
     )
 
 
+pytz = import_optional_dependency("pytz", errors="ignore")
+
+
 def _arrow_dtype_mapping() -> dict:
     pa = import_optional_dependency("pyarrow")
     return {
@@ -120,7 +125,9 @@ def arrow_table_to_pandas(
         raise NotImplementedError
 
     df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
-    return _post_convert_dtypes(df, dtype_backend, dtype, names)
+    df = _post_convert_dtypes(df, dtype_backend, dtype, names)
+    df = _normalize_timezone_dtypes(df)
+    return df
 
 
 def _post_convert_dtypes(
@@ -189,3 +196,68 @@ def _post_convert_dtypes(
                     df[col] = df[col].astype(cat_dtype)
 
     return df
+
+
+def _normalize_pytz_timezone(tz: dt.tzinfo) -> dt.tzinfo:
+    """
+    If the input tz is a pytz timezone, attempt to convert it to "default"
+    tzinfo object (zoneinfo or datetime.timezone).
+    """
+    if not type(tz).__module__.startswith("pytz"):
+        # isinstance(col.dtype.tz, pytz.BaseTzInfo) does not included
+        # fixed offsets
+        return tz
+
+    if timezones.is_utc(tz):
+        return timezones.maybe_get_tz("UTC")
+
+    if timezones.is_fixed_offset(tz):
+        # Convert pytz fixed offset to datetime.timezone
+        try:
+            offset = tz.utcoffset(None)
+            if offset is not None:
+                return dt.timezone(offset)
+        except Exception:
+            pass
+
+    zone = timezones.get_timezone(tz)
+    if isinstance(zone, str):
+        try:
+            return timezones.maybe_get_tz(zone)
+        except Exception:
+            # some pytz timezones might not be available for zoneinfo
+            pass
+
+    return tz
+
+
+def _normalize_timezone_index(index: pd.Index) -> pd.Index:
+    if isinstance(index, pd.MultiIndex):
+        levels = [_normalize_timezone_index(level) for level in index.levels]
+        return index.set_levels(levels)
+
+    if isinstance(index.dtype, pd.DatetimeTZDtype):
+        normalized_tz = _normalize_pytz_timezone(index.dtype.tz)
+        if normalized_tz is not index.dtype.tz:
+            return index.tz_convert(normalized_tz)
+
+    return index
+
+
+def _normalize_timezone_dtypes(df: pd.DataFrame) -> pd.DataFrame:
+    if pytz is not None:
+        # Convert any pytz timezones to zoneinfo / fixed offset timezones
+        if any(
+            isinstance(dtype, pd.DatetimeTZDtype)
+            for dtype in df._mgr.get_unique_dtypes()
+        ):
+            col_indices = df._select_dtypes_indices(pd.DatetimeTZDtype)
+            for i in col_indices:
+                col = df.iloc[:, i]
+                normalized_tz = _normalize_pytz_timezone(col.dtype.tz)
+                if normalized_tz is not col.dtype.tz:
+                    df.isetitem(i, col.dt.tz_convert(normalized_tz))
+
+    df.index = _normalize_timezone_index(df.index)
+    df.columns = _normalize_timezone_index(df.columns)
+    return df