diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 15aa773131c56..ba9625454db35 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -70,7 +70,6 @@ ExtensionDtype, PeriodDtype, ) -from pandas.core.dtypes.missing import isna from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays._ranges import ( @@ -146,27 +145,9 @@ def f(self): values = self._local_timestamps() if field in self._bool_ops: - result: np.ndarray - if field.endswith(("start", "end")): - freq = self.freq - month_kw = 12 - if freq: - kwds = freq.kwds - month_kw = kwds.get("startingMonth", kwds.get("month", month_kw)) - - if freq is not None: - freq_name = freq.rule_code - else: - freq_name = None - result = fields.get_start_end_field( - values, field, freq_name, month_kw, reso=self._creso - ) - else: - result = fields.get_date_field(values, field, reso=self._creso) - - # these return a boolean by-definition - return result + return self._get_start_end_field(field, freq=None) + return fields.get_date_field(values, field, reso=self._creso) result = fields.get_date_field(values, field, reso=self._creso) result = self._maybe_mask_results(result, fill_value=None, convert="float64") @@ -972,10 +953,7 @@ def tz_convert(self, tz) -> Self: # No conversion since timestamps are all UTC to begin with dtype = tz_to_dtype(tz, unit=self.unit) - new_freq = None - if isinstance(self.freq, Tick): - new_freq = self.freq - return self._simple_new(self._ndarray, dtype=dtype, freq=new_freq) + return self._simple_new(self._ndarray, dtype=dtype, freq=None) @dtl.ravel_compat def tz_localize( @@ -1152,15 +1130,7 @@ def tz_localize( new_dates_dt64 = new_dates.view(f"M8[{self.unit}]") dtype = tz_to_dtype(tz, unit=self.unit) - freq = None - if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates_dt64[0])): - # we can preserve freq - # TODO: Also for fixed-offsets - freq = self.freq - elif tz is None and self.tz is None: - # no-op - freq = self.freq - return self._simple_new(new_dates_dt64, dtype=dtype, freq=freq) + return self._simple_new(new_dates_dt64, dtype=dtype, freq=None) # ---------------------------------------------------------------- # Conversion Methods - Vectorized analogues of Timestamp methods @@ -1236,7 +1206,6 @@ def normalize(self) -> Self: dt64_values = new_values.view(self._ndarray.dtype) dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype) - dta = dta._with_freq("infer") if self.tz is not None: dta = dta.tz_localize(self.tz) return dta @@ -2166,6 +2135,30 @@ def daysinmonth(self): ) return self.days_in_month + def _get_start_end_field(self, field: str, freq: BaseOffset | None) -> np.ndarray: + """ + Return boolean array for is_month_start, is_quarter_end, etc. + + Parameters + ---------- + field : str + freq : BaseOffset or None + """ + month_kw = 12 + if freq: + kwds = freq.kwds + month_kw = kwds.get("startingMonth", kwds.get("month", month_kw)) + + if freq is not None: + freq_name = freq.rule_code + else: + freq_name = None + + values = self._local_timestamps() + return fields.get_start_end_field( + values, field, freq_name, month_kw, reso=self._creso + ) + _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8da7b10fa1304..2e83d2836fc2f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -116,7 +116,8 @@ def _new_DatetimeIndex(cls, d): [ method for method in DatetimeArray._datetimelike_methods - if method not in ("tz_localize", "tz_convert", "strftime") + if method + not in ("tz_localize", "tz_convert", "normalize", "to_period", "strftime") ], DatetimeArray, wrap=True, @@ -373,27 +374,27 @@ def nanosecond(self) -> Index: @property def is_month_start(self) -> npt.NDArray[np.bool_]: - return self._data.is_month_start + return self._data._get_start_end_field("is_month_start", self.freq) @property def is_month_end(self) -> npt.NDArray[np.bool_]: - return self._data.is_month_end + return self._data._get_start_end_field("is_month_end", self.freq) @property def is_quarter_start(self) -> npt.NDArray[np.bool_]: - return self._data.is_quarter_start + return self._data._get_start_end_field("is_quarter_start", self.freq) @property def is_quarter_end(self) -> npt.NDArray[np.bool_]: - return self._data.is_quarter_end + return self._data._get_start_end_field("is_quarter_end", self.freq) @property def is_year_start(self) -> npt.NDArray[np.bool_]: - return self._data.is_year_start + return self._data._get_start_end_field("is_year_start", self.freq) @property def is_year_end(self) -> npt.NDArray[np.bool_]: - return self._data.is_year_end + return self._data._get_start_end_field("is_year_end", self.freq) @property def is_leap_year(self) -> npt.NDArray[np.bool_]: @@ -448,6 +449,49 @@ def strftime(self, date_format) -> Index: arr = self._data.strftime(date_format) return Index(arr, name=self.name, dtype=arr.dtype, copy=False) + def normalize(self) -> Self: + """ + Convert times to midnight. + + The time component of the date-time is converted to midnight i.e. + 00:00:00. This is useful in cases, when the time does not matter. + Length is unaltered. The timezones are unaffected. + + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on Datetime Array/Index. + + Returns + ------- + DatetimeArray, DatetimeIndex or Series + The same type as the original data. Series will have the same + name and index. DatetimeIndex will have the same name. + + See Also + -------- + floor : Floor the datetimes to the specified freq. + ceil : Ceil the datetimes to the specified freq. + round : Round the datetimes to the specified freq. + + Examples + -------- + >>> idx = pd.date_range( + ... start="2014-08-01 10:00", freq="h", periods=3, tz="Asia/Calcutta" + ... ) + >>> idx + DatetimeIndex(['2014-08-01 10:00:00+05:30', + '2014-08-01 11:00:00+05:30', + '2014-08-01 12:00:00+05:30'], + dtype='datetime64[us, Asia/Calcutta]', freq='h') + >>> idx.normalize() + DatetimeIndex(['2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30'], + dtype='datetime64[us, Asia/Calcutta]', freq=None) + """ + arr = self._data.normalize() + arr = arr._with_freq("infer") + return type(self)._simple_new(arr, name=self.name) + def tz_convert(self, tz) -> Self: """ Convert tz-aware Datetime Array/Index from one time zone to another. @@ -520,6 +564,9 @@ def tz_convert(self, tz) -> Self: dtype='datetime64[us]', freq='h') """ # noqa: E501 arr = self._data.tz_convert(tz) + freq = self._data.freq + if isinstance(freq, Tick): + arr._freq = freq return type(self)._simple_new(arr, name=self.name, refs=self._references) def tz_localize( @@ -667,7 +714,15 @@ def tz_localize( 1 2015-03-29 03:30:00+02:00 dtype: datetime64[ns, Europe/Warsaw] """ # noqa: E501 + freq = self._data.freq arr = self._data.tz_localize(tz, ambiguous, nonexistent) + if timezones.is_utc(arr.tz) or (len(arr) == 1 and arr[0] is not NaT): + # we can preserve freq + # TODO: Also for fixed-offsets + arr._freq = freq + elif arr.tz is None and self._data.tz is None: + # no-op + arr._freq = freq return type(self)._simple_new(arr, name=self.name) def to_period(self, freq=None) -> PeriodIndex: @@ -721,8 +776,22 @@ def to_period(self, freq=None) -> PeriodIndex: PeriodIndex(['2017-01-01', '2017-01-02'], dtype='period[D]') """ + from pandas.core.dtypes.dtypes import PeriodDtype + from pandas.core.indexes.api import PeriodIndex + from pandas.tseries.frequencies import get_period_alias + + if freq is None: + dt_freq = self._data.freq + freq = self.freqstr + if dt_freq is not None and hasattr(dt_freq, "_period_dtype_code"): + freq = PeriodDtype(dt_freq)._freqstr + + if freq is not None: + res = get_period_alias(freq) + if res is not None: + freq = res arr = self._data.to_period(freq) return PeriodIndex._simple_new(arr, name=self.name) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 120d3fdf52ac2..705b985c86d0c 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2795,6 +2795,9 @@ def test_to_datetime_dta_tz(self, klass): expected = klass(expected) result = to_datetime(obj, utc=True) + if klass is not DatetimeIndex: + # Array methods no longer set freq; freq is managed by Index + expected = expected._with_freq(None) tm.assert_equal(result, expected)