From 54625381b35372e973b9ba54298cbb4277117db7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 15 May 2021 21:19:15 +0100 Subject: [PATCH 1/3] deprecate default arguments as positional in reset_index --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/frame.py | 2 ++ pandas/core/generic.py | 2 +- pandas/core/series.py | 2 ++ pandas/tests/frame/methods/test_droplevel.py | 2 +- .../tests/frame/methods/test_reset_index.py | 29 +++++++++++++------ pandas/tests/groupby/test_grouping.py | 2 +- .../merge/test_merge_index_as_string.py | 4 +-- .../tests/series/methods/test_reset_index.py | 8 ++--- 9 files changed, 34 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 622029adf357f..18b3088088b1e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -647,6 +647,7 @@ Deprecations - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) +- Deprecated passing arguments as positional in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` (:issue:`41485`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2941b6ac01904..759a32131bb21 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,6 +77,7 @@ Appender, Substitution, deprecate_kwarg, + deprecate_nonkeyword_arguments, doc, rewrite_axis_style_signature, ) @@ -5567,6 +5568,7 @@ def reset_index( ) -> DataFrame | None: ... + @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self"]) def reset_index( self, level: Hashable | Sequence[Hashable] | None = None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a09cc0a6324c0..2c3dc8fefd113 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1847,7 +1847,7 @@ def _drop_labels_or_levels(self, keys, axis: int = 0): if axis == 0: # Handle dropping index levels if levels_to_drop: - dropped.reset_index(levels_to_drop, drop=True, inplace=True) + dropped.reset_index(level=levels_to_drop, drop=True, inplace=True) # Handle dropping columns labels if labels_to_drop: diff --git a/pandas/core/series.py b/pandas/core/series.py index c8e9898f9462a..788bc8dc1f6e0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -51,6 +51,7 @@ from pandas.util._decorators import ( Appender, Substitution, + deprecate_nonkeyword_arguments, doc, ) from pandas.util._validators import ( @@ -1275,6 +1276,7 @@ def repeat(self, repeats, axis=None) -> Series: self, method="repeat" ) + @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self"]) def reset_index(self, level=None, drop=False, name=None, inplace=False): """ Generate a new DataFrame or Series with the index reset. diff --git a/pandas/tests/frame/methods/test_droplevel.py b/pandas/tests/frame/methods/test_droplevel.py index e1302d4b73f2b..c3be0a03ecf3e 100644 --- a/pandas/tests/frame/methods/test_droplevel.py +++ b/pandas/tests/frame/methods/test_droplevel.py @@ -20,7 +20,7 @@ def test_droplevel(self, frame_or_series): df = df.iloc[:, 0] # test that dropping of a level in index works - expected = df.reset_index("a", drop=True) + expected = df.reset_index(level="a", drop=True) result = df.droplevel("a", axis="index") tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 5a87803ddc21e..7ca046fbfd278 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -142,18 +142,18 @@ def test_reset_index(self, float_frame): # only remove certain columns df = float_frame.reset_index().set_index(["index", "A", "B"]) - rs = df.reset_index(["A", "B"]) + rs = df.reset_index(level=["A", "B"]) # TODO should reset_index check_names ? tm.assert_frame_equal(rs, float_frame, check_names=False) - rs = df.reset_index(["index", "A", "B"]) + rs = df.reset_index(level=["index", "A", "B"]) tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - rs = df.reset_index(["index", "A", "B"]) + rs = df.reset_index(level=["index", "A", "B"]) tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - rs = df.reset_index("A") + rs = df.reset_index(level="A") xp = float_frame.reset_index().set_index(["index", "B"]) tm.assert_frame_equal(rs, xp, check_names=False) @@ -165,7 +165,7 @@ def test_reset_index(self, float_frame): tm.assert_frame_equal(df, reset, check_names=False) df = float_frame.reset_index().set_index(["index", "A", "B"]) - rs = df.reset_index("A", drop=True) + rs = df.reset_index(level="A", drop=True) xp = float_frame.copy() del xp["A"] xp = xp.set_index(["B"], append=True) @@ -262,7 +262,7 @@ def test_reset_index_multiindex_col(self): MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), columns=[["b", "b", "c"], ["mean", "median", "mean"]], ) - rs = df.reset_index("a") + rs = df.reset_index(level="a") xp = DataFrame( full, Index([0, 1, 2], name="d"), @@ -270,7 +270,7 @@ def test_reset_index_multiindex_col(self): ) tm.assert_frame_equal(rs, xp) - rs = df.reset_index("a", col_fill=None) + rs = df.reset_index(level="a", col_fill=None) xp = DataFrame( full, Index(range(3), name="d"), @@ -278,7 +278,7 @@ def test_reset_index_multiindex_col(self): ) tm.assert_frame_equal(rs, xp) - rs = df.reset_index("a", col_fill="blah", col_level=1) + rs = df.reset_index(level="a", col_fill="blah", col_level=1) xp = DataFrame( full, Index(range(3), name="d"), @@ -665,9 +665,20 @@ def test_reset_index_multiindex_nat(): tstamp = date_range("2015-07-01", freq="D", periods=3) df = DataFrame({"id": idx, "tstamp": tstamp, "a": list("abc")}) df.loc[2, "tstamp"] = pd.NaT - result = df.set_index(["id", "tstamp"]).reset_index("id") + result = df.set_index(["id", "tstamp"]).reset_index(level="id") expected = DataFrame( {"id": range(3), "a": list("abc")}, index=pd.DatetimeIndex(["2015-07-01", "2015-07-02", "NaT"], name="tstamp"), ) tm.assert_frame_equal(result, expected) + + +def test_drop_pos_args_deprecation(): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}).set_index("a") + msg = ( + r"Starting with Pandas version 2\.0 all arguments of reset_index except for " + r"the argument 'self' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.reset_index("a") diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 3d02e784d83b0..00bb667d6507a 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -291,7 +291,7 @@ def test_grouper_column_and_index(self): # Grouping a single-index frame by a column and the index should # be equivalent to resetting the index and grouping by two columns - df_single = df_multi.reset_index("outer") + df_single = df_multi.reset_index(level="outer") result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() expected = df_single.reset_index().groupby(["B", "inner"]).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py index c3e0a92850c07..5602a7445d9d6 100644 --- a/pandas/tests/reshape/merge/test_merge_index_as_string.py +++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -96,11 +96,11 @@ def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, ho # Drop index levels that aren't involved in the merge drop_left = [n for n in left_levels if n not in left_on] if drop_left: - df_left = df_left.reset_index(drop_left, drop=True) + df_left = df_left.reset_index(level=drop_left, drop=True) drop_right = [n for n in right_levels if n not in right_on] if drop_right: - df_right = df_right.reset_index(drop_right, drop=True) + df_right = df_right.reset_index(level=drop_right, drop=True) # Convert remaining index levels to columns reset_left = [n for n in left_levels if n in left_on] diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 70b9c9c9dc7d7..ccc454c9b11be 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -107,7 +107,7 @@ def test_reset_index_level(self): s.reset_index(level=[0, 1, 2]) # Check that .reset_index([],drop=True) doesn't fail - result = Series(range(4)).reset_index([], drop=True) + result = Series(range(4)).reset_index(level=[], drop=True) expected = Series(range(4)) tm.assert_series_equal(result, expected) @@ -127,14 +127,14 @@ def test_reset_index_drop_errors(self): # KeyError raised for series index when passed level name is missing s = Series(range(4)) with pytest.raises(KeyError, match="does not match index name"): - s.reset_index("wrong", drop=True) + s.reset_index(level="wrong", drop=True) with pytest.raises(KeyError, match="does not match index name"): - s.reset_index("wrong") + s.reset_index(level="wrong") # KeyError raised for series when level to be dropped is missing s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) with pytest.raises(KeyError, match="not found"): - s.reset_index("wrong", drop=True) + s.reset_index(level="wrong", drop=True) def test_reset_index_with_drop(self, series_with_multilevel_index): ser = series_with_multilevel_index From 862addfe367696ee8bafe95d9f7b253063caad14 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 16 May 2021 10:01:15 +0100 Subject: [PATCH 2/3] update example from cookbook --- doc/source/user_guide/cookbook.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index e1aae0fd481b1..7e8ac3e1b1d5e 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -312,7 +312,7 @@ The :ref:`multindexing ` docs. df.columns = pd.MultiIndex.from_tuples([tuple(c.split("_")) for c in df.columns]) df # Now stack & Reset - df = df.stack(0).reset_index(1) + df = df.stack(0).reset_index(level=1) df # And fix the labels (Notice the label 'level_1' got added automatically) df.columns = ["Sample", "All_X", "All_Y"] From 10fbed695e2a527741fdedd869b1cf6b4ffff010 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 17 May 2021 21:57:35 +0100 Subject: [PATCH 3/3] allow level to be passed positionally --- doc/source/user_guide/cookbook.rst | 2 +- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/series.py | 2 +- pandas/tests/frame/methods/test_droplevel.py | 2 +- .../tests/frame/methods/test_reset_index.py | 24 ++++++++++--------- pandas/tests/groupby/test_grouping.py | 2 +- .../merge/test_merge_index_as_string.py | 4 ++-- .../tests/series/methods/test_reset_index.py | 20 ++++++++++++---- 10 files changed, 38 insertions(+), 24 deletions(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 7e8ac3e1b1d5e..e1aae0fd481b1 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -312,7 +312,7 @@ The :ref:`multindexing ` docs. df.columns = pd.MultiIndex.from_tuples([tuple(c.split("_")) for c in df.columns]) df # Now stack & Reset - df = df.stack(0).reset_index(level=1) + df = df.stack(0).reset_index(1) df # And fix the labels (Notice the label 'level_1' got added automatically) df.columns = ["Sample", "All_X", "All_Y"] diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 18b3088088b1e..8b7758818b2c4 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -647,7 +647,7 @@ Deprecations - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) -- Deprecated passing arguments as positional in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` (:issue:`41485`) +- Deprecated passing arguments as positional (except for ``"level"``) in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` (:issue:`41485`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 759a32131bb21..306b650432643 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5568,7 +5568,7 @@ def reset_index( ) -> DataFrame | None: ... - @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self"]) + @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self", "level"]) def reset_index( self, level: Hashable | Sequence[Hashable] | None = None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2c3dc8fefd113..a09cc0a6324c0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1847,7 +1847,7 @@ def _drop_labels_or_levels(self, keys, axis: int = 0): if axis == 0: # Handle dropping index levels if levels_to_drop: - dropped.reset_index(level=levels_to_drop, drop=True, inplace=True) + dropped.reset_index(levels_to_drop, drop=True, inplace=True) # Handle dropping columns labels if labels_to_drop: diff --git a/pandas/core/series.py b/pandas/core/series.py index 788bc8dc1f6e0..c98ae20668ad3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1276,7 +1276,7 @@ def repeat(self, repeats, axis=None) -> Series: self, method="repeat" ) - @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self"]) + @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self", "level"]) def reset_index(self, level=None, drop=False, name=None, inplace=False): """ Generate a new DataFrame or Series with the index reset. diff --git a/pandas/tests/frame/methods/test_droplevel.py b/pandas/tests/frame/methods/test_droplevel.py index c3be0a03ecf3e..e1302d4b73f2b 100644 --- a/pandas/tests/frame/methods/test_droplevel.py +++ b/pandas/tests/frame/methods/test_droplevel.py @@ -20,7 +20,7 @@ def test_droplevel(self, frame_or_series): df = df.iloc[:, 0] # test that dropping of a level in index works - expected = df.reset_index(level="a", drop=True) + expected = df.reset_index("a", drop=True) result = df.droplevel("a", axis="index") tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 7ca046fbfd278..e2f255ceb546d 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -142,18 +142,18 @@ def test_reset_index(self, float_frame): # only remove certain columns df = float_frame.reset_index().set_index(["index", "A", "B"]) - rs = df.reset_index(level=["A", "B"]) + rs = df.reset_index(["A", "B"]) # TODO should reset_index check_names ? tm.assert_frame_equal(rs, float_frame, check_names=False) - rs = df.reset_index(level=["index", "A", "B"]) + rs = df.reset_index(["index", "A", "B"]) tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - rs = df.reset_index(level=["index", "A", "B"]) + rs = df.reset_index(["index", "A", "B"]) tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - rs = df.reset_index(level="A") + rs = df.reset_index("A") xp = float_frame.reset_index().set_index(["index", "B"]) tm.assert_frame_equal(rs, xp, check_names=False) @@ -165,7 +165,7 @@ def test_reset_index(self, float_frame): tm.assert_frame_equal(df, reset, check_names=False) df = float_frame.reset_index().set_index(["index", "A", "B"]) - rs = df.reset_index(level="A", drop=True) + rs = df.reset_index("A", drop=True) xp = float_frame.copy() del xp["A"] xp = xp.set_index(["B"], append=True) @@ -262,7 +262,7 @@ def test_reset_index_multiindex_col(self): MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), columns=[["b", "b", "c"], ["mean", "median", "mean"]], ) - rs = df.reset_index(level="a") + rs = df.reset_index("a") xp = DataFrame( full, Index([0, 1, 2], name="d"), @@ -270,7 +270,7 @@ def test_reset_index_multiindex_col(self): ) tm.assert_frame_equal(rs, xp) - rs = df.reset_index(level="a", col_fill=None) + rs = df.reset_index("a", col_fill=None) xp = DataFrame( full, Index(range(3), name="d"), @@ -278,7 +278,7 @@ def test_reset_index_multiindex_col(self): ) tm.assert_frame_equal(rs, xp) - rs = df.reset_index(level="a", col_fill="blah", col_level=1) + rs = df.reset_index("a", col_fill="blah", col_level=1) xp = DataFrame( full, Index(range(3), name="d"), @@ -665,7 +665,7 @@ def test_reset_index_multiindex_nat(): tstamp = date_range("2015-07-01", freq="D", periods=3) df = DataFrame({"id": idx, "tstamp": tstamp, "a": list("abc")}) df.loc[2, "tstamp"] = pd.NaT - result = df.set_index(["id", "tstamp"]).reset_index(level="id") + result = df.set_index(["id", "tstamp"]).reset_index("id") expected = DataFrame( {"id": range(3), "a": list("abc")}, index=pd.DatetimeIndex(["2015-07-01", "2015-07-02", "NaT"], name="tstamp"), @@ -678,7 +678,9 @@ def test_drop_pos_args_deprecation(): df = DataFrame({"a": [1, 2, 3]}).set_index("a") msg = ( r"Starting with Pandas version 2\.0 all arguments of reset_index except for " - r"the argument 'self' will be keyword-only" + r"the arguments 'self' and 'level' will be keyword-only" ) with tm.assert_produces_warning(FutureWarning, match=msg): - df.reset_index("a") + result = df.reset_index("a", False) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 00bb667d6507a..3d02e784d83b0 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -291,7 +291,7 @@ def test_grouper_column_and_index(self): # Grouping a single-index frame by a column and the index should # be equivalent to resetting the index and grouping by two columns - df_single = df_multi.reset_index(level="outer") + df_single = df_multi.reset_index("outer") result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() expected = df_single.reset_index().groupby(["B", "inner"]).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py index 5602a7445d9d6..c3e0a92850c07 100644 --- a/pandas/tests/reshape/merge/test_merge_index_as_string.py +++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -96,11 +96,11 @@ def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, ho # Drop index levels that aren't involved in the merge drop_left = [n for n in left_levels if n not in left_on] if drop_left: - df_left = df_left.reset_index(level=drop_left, drop=True) + df_left = df_left.reset_index(drop_left, drop=True) drop_right = [n for n in right_levels if n not in right_on] if drop_right: - df_right = df_right.reset_index(level=drop_right, drop=True) + df_right = df_right.reset_index(drop_right, drop=True) # Convert remaining index levels to columns reset_left = [n for n in left_levels if n in left_on] diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index ccc454c9b11be..8904849f4930f 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -107,7 +107,7 @@ def test_reset_index_level(self): s.reset_index(level=[0, 1, 2]) # Check that .reset_index([],drop=True) doesn't fail - result = Series(range(4)).reset_index(level=[], drop=True) + result = Series(range(4)).reset_index([], drop=True) expected = Series(range(4)) tm.assert_series_equal(result, expected) @@ -127,14 +127,14 @@ def test_reset_index_drop_errors(self): # KeyError raised for series index when passed level name is missing s = Series(range(4)) with pytest.raises(KeyError, match="does not match index name"): - s.reset_index(level="wrong", drop=True) + s.reset_index("wrong", drop=True) with pytest.raises(KeyError, match="does not match index name"): - s.reset_index(level="wrong") + s.reset_index("wrong") # KeyError raised for series when level to be dropped is missing s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) with pytest.raises(KeyError, match="not found"): - s.reset_index(level="wrong", drop=True) + s.reset_index("wrong", drop=True) def test_reset_index_with_drop(self, series_with_multilevel_index): ser = series_with_multilevel_index @@ -148,6 +148,18 @@ def test_reset_index_with_drop(self, series_with_multilevel_index): assert isinstance(deleveled, Series) assert deleveled.index.name == ser.index.name + def test_drop_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + ser = Series([1, 2, 3], index=Index([1, 2, 3], name="a")) + msg = ( + r"Starting with Pandas version 2\.0 all arguments of reset_index except " + r"for the arguments 'self' and 'level' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.reset_index("a", False) + expected = DataFrame({"a": [1, 2, 3], 0: [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "array, dtype",