diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 0cbb3db24c946..db5c7bcb301d9 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -267,6 +267,9 @@ Indexing - Bug in :meth:`Index.get_level_values` mishandling boolean, NA-like (``np.nan``, ``pd.NA``, ``pd.NaT``) and integer index names (:issue:`62169`) - Bug in :meth:`Index.get_loc` raising ``KeyError`` when looking up a tuple in an object-dtype :class:`Index` with duplicates (:issue:`37800`) - Bug in :meth:`Index.insert` silently casting booleans to numeric when used with nullable numeric dtypes like ``Float64`` or ``Int64`` (:issue:`61709`) +- Fixed bug in :meth:`DataFrame.loc` where assigning with duplicate column names and new columns corrupted unrelated columns (:issue:`58317`) +- + Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0c14c9c925803..8bccee80d7415 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -913,8 +913,7 @@ def _ensure_listlike_indexer(self, key, axis=None) -> None: # below would create float64 columns in this example, which # would successfully hold 7, so we would end up with the wrong # dtype. - indexer = np.arange(len(keys), dtype=np.intp) - indexer[len(self.obj.columns) :] = -1 + indexer = self.obj.columns.get_indexer(keys) new_mgr = self.obj._mgr.reindex_indexer( keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index ef3d4797fc82a..a5898f3f52183 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2398,6 +2398,28 @@ def test_loc_setitem_with_expansion_new_row_and_new_columns(self): ) tm.assert_frame_equal(df, expected) + def test_loc_setitem_with_expansion_duplicate_columns(self): + # GH#58317 + df = DataFrame( + [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], + columns=["D", "B", "C", "A"], + ) + item = DataFrame( + [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], + columns=["A", "B", "C", "X"], + index=[3, 2, 1], + ) + df.loc[[True, False, True], ["B", "E", "B"]] = item + expected = DataFrame( + [ + [1, np.nan, np.nan, 3, 4, np.nan], + [4, 5.0, 5.0, 6, 7, np.nan], + [7, 5.0, 5.0, 9, 10, np.nan], + ], + columns=["D", "B", "B", "C", "A", "E"], + ) + tm.assert_frame_equal(df, expected) + class TestLocCallable: def test_frame_loc_getitem_callable(self):