Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ Performance improvements
- Performance improvement in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc`
setitem with a 2D list-of-lists value by avoiding a wasteful round-trip
through an intermediate object array (:issue:`64229`).
- Performance improvement in :meth:`Series.reindex` and :meth:`DataFrame.reindex` for non-nanosecond ``datetime64`` and ``timedelta64`` dtypes (:issue:`24566`)
- Performance improvement in :meth:`Series.iloc` and :meth:`DataFrame.iloc`
when setting datetimelike values into object-dtype data with list-like
indexers (:issue:`64250`).
Expand Down
26 changes: 19 additions & 7 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ def take_2d_multi(
if func is not None:
func = _convert_wrapper(func, out.dtype)

# datetime64/timedelta64 of any resolution use int64 storage
if func is None and arr.dtype.kind in "mM" and arr.dtype == out.dtype:
func = _view_wrapper(
libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64
)

if mask_info is not None:
_, (row_needs, col_needs) = mask_info
needs_fill = row_needs or col_needs
Expand Down Expand Up @@ -299,6 +305,18 @@ def _get_take_nd_function_cached(
func = _convert_wrapper(func, out_dtype)
return func

# datetime64/timedelta64 of any resolution use int64 storage;
# the dispatch dicts only have entries for ns resolution, so handle
# other resolutions here.
if arr_dtype.kind in "mM" and arr_dtype == out_dtype:
if ndim == 1:
base_func = libalgos.take_1d_int64_int64
elif axis == 0:
base_func = libalgos.take_2d_axis0_int64_int64
else:
base_func = libalgos.take_2d_axis1_int64_int64
return _view_wrapper(base_func, np.int64, np.int64, fill_wrap=np.int64)

return None


Expand Down Expand Up @@ -342,13 +360,7 @@ def wrapper(
if out_dtype is not None:
out = out.view(out_dtype)
if fill_wrap is not None:
# FIXME: if we get here with dt64/td64 we need to be sure we have
# matching resos
if fill_value.dtype.kind == "m":
fill_value = fill_value.astype("m8[ns]")
else:
fill_value = fill_value.astype("M8[ns]")
fill_value = fill_wrap(fill_value)
fill_value = fill_value.view("i8")

f(arr, indexer, out, fill_value=fill_value, allow_fill=allow_fill)

Expand Down
53 changes: 51 additions & 2 deletions pandas/tests/test_take.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandas import array
import pandas._testing as tm
import pandas.core.algorithms as algos
from pandas.core.array_algos.take import _get_take_nd_function_cached


@pytest.fixture(
Expand Down Expand Up @@ -150,12 +151,60 @@ def test_1d_unsigned_int_uses_cython_path(self, dtype):
# GH#????? - _take_1d_dict had wrong keys for uint16/uint32/uint64,
# causing fallback to the slow object path instead of the fast
# Cython path. Verify the optimized function is found.
from pandas.core.array_algos.take import _get_take_nd_function_cached

arr_dtype = np.dtype(dtype)
func = _get_take_nd_function_cached(1, arr_dtype, arr_dtype, 0)
assert func is not None

@pytest.mark.parametrize(
"dtype",
[
"datetime64[us]",
"datetime64[ms]",
"datetime64[s]",
"timedelta64[us]",
"timedelta64[ms]",
"timedelta64[s]",
],
)
@pytest.mark.parametrize("ndim", [1, 2])
def test_non_ns_datetime_timedelta_uses_cython_path(self, dtype, ndim):
arr_dtype = np.dtype(dtype)
func = _get_take_nd_function_cached(ndim, arr_dtype, arr_dtype, 0)
assert func is not None

@pytest.mark.parametrize(
"dtype",
["datetime64[us]", "datetime64[s]", "timedelta64[us]", "timedelta64[s]"],
)
def test_1d_non_ns_datetime_timedelta(self, dtype):
arr = np.arange(5, dtype="i8").view(dtype)
indexer = np.array([3, 1, 0, -1], dtype=np.intp)

result = algos.take_nd(arr, indexer)
expected = arr.take([3, 1, 0, 0])
expected.view("i8")[-1] = iNaT
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize(
"dtype",
["datetime64[us]", "datetime64[s]", "timedelta64[us]", "timedelta64[s]"],
)
def test_2d_non_ns_datetime_timedelta(self, dtype):
arr = np.arange(15, dtype="i8").reshape(5, 3).view(dtype)
indexer = np.array([0, 2, -1, 1, -1], dtype=np.intp)

# axis=0
result = algos.take_nd(arr, indexer, axis=0)
expected = arr.take(indexer, axis=0)
expected.view(np.int64)[[2, 4], :] = iNaT
tm.assert_numpy_array_equal(result, expected)

# axis=1
result = algos.take_nd(arr, indexer[:3], axis=1)
expected = arr.take(indexer[:3], axis=1)
expected.view(np.int64)[:, 2] = iNaT
tm.assert_numpy_array_equal(result, expected)

def test_1d_other_dtypes(self):
arr = np.random.default_rng(2).standard_normal(10).astype(np.float32)

Expand Down
Loading