Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ Datetimelike
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` raising ``AssertionError`` instead of :class:`OutOfBoundsDatetime` when replacing with a ``datetime`` value outside the ``datetime64[ns]`` range (:issue:`61671`)
- Bug in :meth:`DatetimeArray.isin` and :meth:`TimedeltaArray.isin` where mismatched resolutions could silently truncate finer-resolution values, leading to false matches (:issue:`64545`)
- Bug in adding non-nano :class:`DatetimeIndex` with non-vectorized offsets (e.g. :class:`CustomBusinessDay`, :class:`CustomBusinessMonthEnd`) having a sub-unit ``offset`` parameter incorrectly truncating the result or raising ``AttributeError`` (:issue:`56586`)
- Bug in casting from timezone-naive to timezone-aware datetime dtype where :meth:`DatetimeArray.astype` raised ``TypeError`` and PyArrow-backed arrays incorrectly treated naive timestamps as UTC instead of using ``tz_localize`` semantics (:issue:`49281`)
- Bug in subtracting :class:`BusinessHour` (or :class:`CustomBusinessHour`) from a :class:`Timestamp` giving incorrect results when the subtraction would land exactly on the business-hour opening time (:issue:`33682`)

Timedelta
Expand Down
19 changes: 18 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,18 @@ def _box_pa_array(
value = extract_array(value, extract_numpy=True)
if isinstance(value, cls):
pa_array = value._pa_array
if (
pa_type is not None
and pa.types.is_timestamp(pa_type)
and pa_type.tz is not None
and pa.types.is_timestamp(pa_array.type)
and pa_array.type.tz is None
):
# GH#49281 tz-naive to tz-aware: treat as wall times.
# pyarrow cast assumes UTC; use assume_timezone instead.
pa_array = pc.assume_timezone(
pa_array, str(pa_type.tz), ambiguous="raise", nonexistent="raise"
)
elif isinstance(value, (pa.Array, pa.ChunkedArray)):
pa_array = value
elif isinstance(value, BaseMaskedArray):
Expand Down Expand Up @@ -642,7 +654,12 @@ def _box_pa_array(

pass_dtype = tz_to_dtype(tz=pa_type.tz, unit=pa_type.unit)
value = extract_array(value, extract_numpy=True)
if isinstance(value, DatetimeArray):
if isinstance(value, DatetimeArray) and (
value.tz is not None or pa_type.tz is None
):
# GH#49281 only skip _from_sequence when the value
# already has a tz or target is tz-naive. tz-naive to
# tz-aware must go through _from_sequence to tz_localize.
dta = value
else:
dta = DatetimeArray._from_sequence(
Expand Down
11 changes: 3 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,14 +762,9 @@ def astype(self, dtype, copy: bool = True):
# e.g. Sparse[datetime64[ns]]
return super().astype(dtype, copy=copy)
elif self.tz is None:
# pre-2.0 this did self.tz_localize(dtype.tz), which did not match
# the Series behavior which did
# values.tz_localize("UTC").tz_convert(dtype.tz)
raise TypeError(
"Cannot use .astype to convert from timezone-naive dtype to "
"timezone-aware dtype. Use obj.tz_localize instead or "
"series.dt.tz_localize instead"
)
# GH#49281 treat as wall times, consistent with the
# Series(dt64_values, dtype=tzaware_dtype) constructor.
return self.tz_localize(dtype.tz)
else:
# tzaware unit conversion e.g. datetime64[s, UTC]
np_dtype = np.dtype(dtype.str)
Expand Down
14 changes: 4 additions & 10 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,19 +384,13 @@ def test_astype_copies(self, dtype, other):
ser = pd.Series([1, 2], dtype=dtype)
orig = ser.copy()

err = False
if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"):
# deprecated in favor of tz_localize
err = True

if err:
if dtype == "datetime64[ns]":
msg = "Use obj.tz_localize instead or series.dt.tz_localize instead"
else:
msg = "from timezone-aware dtype to timezone-naive dtype"
if dtype != "datetime64[ns]" and other == "datetime64[ns]":
# tz-aware to tz-naive still raises
msg = "from timezone-aware dtype to timezone-naive dtype"
with pytest.raises(TypeError, match=msg):
ser.astype(other)
else:
# GH#49281 tz-naive to tz-aware now does tz_localize
t = ser.astype(other)
t[:] = pd.NaT
tm.assert_series_equal(ser, orig)
Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1549,6 +1549,45 @@ def test_astype_errors_ignore():
tm.assert_frame_equal(result, expected)


def test_astype_pyarrow_tznaive_to_tzaware():
# GH#49281 tz-naive to tz-aware should use tz_localize semantics
# (treat as wall times), not tz_localize("UTC").tz_convert(tz)
ser = pd.Series(
pd.date_range("2020-01-01", periods=3, freq="D"),
dtype="timestamp[ns][pyarrow]",
)
result = ser.astype("timestamp[ns, US/Eastern][pyarrow]")
expected = pd.Series(
pd.date_range("2020-01-01", periods=3, freq="D", tz="US/Eastern"),
dtype="timestamp[ns, tz=US/Eastern][pyarrow]",
)
tm.assert_series_equal(result, expected)


def test_astype_cross_family_tznaive_to_tzaware():
# GH#49281 cross-family astype should also use tz_localize semantics
ser = pd.Series(pd.date_range("2020-01-01", periods=3, freq="D"))
# numpy dt64 naive -> pyarrow tz-aware
result = ser.astype("timestamp[ns, US/Eastern][pyarrow]")
expected = pd.Series(
pd.date_range("2020-01-01", periods=3, freq="D", tz="US/Eastern"),
dtype="timestamp[ns, tz=US/Eastern][pyarrow]",
)
tm.assert_series_equal(result, expected)

# pyarrow tz-naive -> numpy dt64 tz-aware
pa_ser = pd.Series(
pd.date_range("2020-01-01", periods=3, freq="D"),
dtype="timestamp[ns][pyarrow]",
)
result = pa_ser.astype("datetime64[ns, US/Eastern]")
expected = pd.Series(
pd.date_range("2020-01-01", periods=3, freq="D", tz="US/Eastern"),
dtype="datetime64[ns, US/Eastern]",
)
tm.assert_series_equal(result, expected)


def test_to_numpy_with_defaults(data, using_nan_is_na):
# GH49973
result = data.to_numpy()
Expand Down
17 changes: 9 additions & 8 deletions pandas/tests/indexes/datetimes/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,17 @@ def test_astype_tzaware_to_tzaware(self):
assert result.freq == expected.freq

def test_astype_tznaive_to_tzaware(self):
# GH 18951: tz-naive to tz-aware
# GH 18951, GH#49281: tz-naive to tz-aware
idx = date_range("20170101", periods=4)
idx = idx._with_freq(None) # tz_localize does not preserve freq
msg = "Cannot use .astype to convert from timezone-naive"
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz deprecated
idx.astype("datetime64[ns, US/Eastern]")
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz deprecated
idx._data.astype("datetime64[ns, US/Eastern]")
expected = idx.tz_localize("US/Eastern")

result = idx.astype("datetime64[ns, US/Eastern]")
tm.assert_index_equal(result, expected)

result = idx._data.astype("datetime64[ns, US/Eastern]")
expected_dta = expected._data
tm.assert_datetime_array_equal(result, expected_dta)

def test_astype_str_nat(self, using_infer_string):
# GH 13149, GH 13209
Expand Down
22 changes: 6 additions & 16 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,6 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
index = index.tz_localize(tz_naive_fixture)
dtype = index.dtype

# As of 2.0 astype raises on dt64.astype(dt64tz)
err = tz_naive_fixture is not None
msg = "Cannot use .astype to convert from timezone-naive dtype to"

if attr == "asi8":
result = DatetimeIndex(arg).tz_localize(tz_naive_fixture)
tm.assert_index_equal(result, index)
Expand All @@ -274,12 +270,9 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
tm.assert_index_equal(result, index)

if attr == "asi8":
if err:
with pytest.raises(TypeError, match=msg):
DatetimeIndex(arg).astype(dtype)
else:
result = DatetimeIndex(arg).astype(dtype)
tm.assert_index_equal(result, index)
# GH#49281 astype now does tz_localize
result = DatetimeIndex(arg).astype(dtype)
tm.assert_index_equal(result, index)
else:
result = klass(arg, dtype=dtype)
tm.assert_index_equal(result, index)
Expand All @@ -295,12 +288,9 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
tm.assert_index_equal(result, index)

if attr == "asi8":
if err:
with pytest.raises(TypeError, match=msg):
DatetimeIndex(list(arg)).astype(dtype)
else:
result = DatetimeIndex(list(arg)).astype(dtype)
tm.assert_index_equal(result, index)
# GH#49281 astype now does tz_localize
result = DatetimeIndex(list(arg)).astype(dtype)
tm.assert_index_equal(result, index)
else:
result = klass(list(arg), dtype=dtype)
tm.assert_index_equal(result, index)
Expand Down
14 changes: 7 additions & 7 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,14 +273,14 @@ def test_astype_datetime64tz(self):
tm.assert_series_equal(result, expected)

# astype - datetime64[ns, tz]
msg = "Cannot use .astype to convert from timezone-naive"
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz astype deprecated
Series(ser.values).astype("datetime64[ns, US/Eastern]")
# GH#49281 tz_localize semantics (treat as wall times)
result = Series(ser.values).astype("datetime64[ns, US/Eastern]")
expected = Series(ser.values).dt.tz_localize("US/Eastern")
tm.assert_series_equal(result, expected)

with pytest.raises(TypeError, match=msg):
# dt64->dt64tz astype deprecated
Series(ser.values).astype(ser.dtype)
result = Series(ser.values).astype(ser.dtype)
expected = Series(ser.values).dt.tz_localize("US/Eastern")
tm.assert_series_equal(result, expected)

result = ser.astype("datetime64[ns, CET]")
expected = Series(
Expand Down
12 changes: 0 additions & 12 deletions pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import numpy as np
import pytest

from pandas._libs import lib
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -184,17 +183,6 @@ def test_convert_dtypes(
using_infer_string,
using_nan_is_na,
):
if (
hasattr(data, "dtype")
and lib.is_np_dtype(data.dtype, "M")
and isinstance(maindtype, pd.DatetimeTZDtype)
):
# this astype is deprecated in favor of tz_localize
msg = "Cannot use .astype to convert from timezone-naive dtype"
with pytest.raises(TypeError, match=msg):
pd.Series(data, dtype=maindtype)
return

if maindtype is not None:
series = pd.Series(data, dtype=maindtype)
else:
Expand Down
Loading