diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6bb972c21d927..1d02b8128ef50 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -235,6 +235,8 @@ Deprecations - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`) - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`) - Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`) +- Deprecated making functions given to :meth:`Series.agg` attempt to operate on each element in the :class:`Series` and only operate on the whole :class:`Series` if the elementwise operations failed. In the future, functions given to :meth:`Series.agg` will always operate on the whole :class:`Series` only. To keep the current behavior, use :meth:`Series.transform` instead. (:issue:`53325`) +- Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`) - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`) - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`) - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 007dd2bb2a89d..1b2aa1d053240 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1121,23 +1121,25 @@ def apply(self) -> DataFrame | Series: def agg(self): result = super().agg() if result is None: + obj = self.obj func = self.func - # string, list-like, and dict-like are entirely handled in super assert callable(func) - # try a regular apply, this evaluates lambdas - # row-by-row; however if the lambda is expected a Series - # expression, e.g.: lambda x: x-x.quantile(0.25) - # this will fail, so we can try a vectorized evaluation - - # we cannot FIRST try the vectorized evaluation, because - # then .agg and .apply would have different semantics if the - # operation is actually defined on the Series, e.g. str + # GH53325: The setup below is just to keep current behavior while emitting a + # deprecation message. In the future this will all be replaced with a simple + # `result = f(self.obj, *self.args, **self.kwargs)`. try: - result = self.obj.apply(func, args=self.args, **self.kwargs) + result = obj.apply(func, args=self.args, **self.kwargs) except (ValueError, AttributeError, TypeError): - result = func(self.obj, *self.args, **self.kwargs) + result = func(obj, *self.args, **self.kwargs) + else: + msg = ( + f"using {func} in {type(obj).__name__}.agg cannot aggregate and " + f"has been deprecated. Use {type(obj).__name__}.transform to " + f"keep behavior unchanged." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) return result diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index fc8b57d26a5be..99fc393ff82c5 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1478,8 +1478,8 @@ def test_any_apply_keyword_non_zero_axis_regression(): tm.assert_series_equal(result, expected) -def test_agg_list_like_func_with_args(): - # GH 50624 +def test_agg_mapping_func_deprecated(): + # GH 53325 df = DataFrame({"x": [1, 2, 3]}) def foo1(x, a=1, c=0): @@ -1488,17 +1488,26 @@ def foo1(x, a=1, c=0): def foo2(x, b=2, c=0): return x + b + c - msg = r"foo1\(\) got an unexpected keyword argument 'b'" - with pytest.raises(TypeError, match=msg): - df.agg([foo1, foo2], 0, 3, b=3, c=4) + # single func already takes the vectorized path + result = df.agg(foo1, 0, 3, c=4) + expected = df + 7 + tm.assert_frame_equal(result, expected) + + msg = "using .+ in Series.agg cannot aggregate and" - result = df.agg([foo1, foo2], 0, 3, c=4) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.agg([foo1, foo2], 0, 3, c=4) expected = DataFrame( - [[8, 8], [9, 9], [10, 10]], - columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), + [[8, 8], [9, 9], [10, 10]], columns=[["x", "x"], ["foo1", "foo2"]] ) tm.assert_frame_equal(result, expected) + # TODO: the result below is wrong, should be fixed (GH53325) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.agg({"x": foo1}, 0, 3, c=4) + expected = DataFrame([2, 3, 4], columns=["x"]) + tm.assert_frame_equal(result, expected) + def test_agg_std(): df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"]) diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 8e385de0b48e0..2d57515882aed 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -66,6 +66,28 @@ def test_transform_empty_listlike(float_frame, ops, frame_or_series): obj.transform(ops) +def test_transform_listlike_func_with_args(): + # GH 50624 + df = DataFrame({"x": [1, 2, 3]}) + + def foo1(x, a=1, c=0): + return x + a + c + + def foo2(x, b=2, c=0): + return x + b + c + + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + df.transform([foo1, foo2], 0, 3, b=3, c=4) + + result = df.transform([foo1, foo2], 0, 3, c=4) + expected = DataFrame( + [[8, 8], [9, 9], [10, 10]], + columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [dict, Series]) def test_transform_dictlike(axis, float_frame, box): # GH 35964 diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index d75b784302676..21b5c803d0e76 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -8,6 +8,7 @@ from itertools import chain import re +import warnings import numpy as np import pytest @@ -307,7 +308,10 @@ def test_transform_and_agg_err_series(string_series, func, msg): # we are trying to transform with an aggregator with pytest.raises(ValueError, match=msg): with np.errstate(all="ignore"): - string_series.agg(func) + # GH53325 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + string_series.agg(func) @pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]]) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 985cb5aa5b09c..425d2fb42a711 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -108,14 +108,18 @@ def f(x, a=0, b=0, c=0): return x + a + 10 * b + 100 * c s = Series([1, 2]) - result = s.agg(f, 0, *args, **kwargs) + msg = ( + "in Series.agg cannot aggregate and has been deprecated. " + "Use Series.transform to keep behavior unchanged." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.agg(f, 0, *args, **kwargs) expected = s + increment tm.assert_series_equal(result, expected) -def test_agg_list_like_func_with_args(): - # GH 50624 - +def test_agg_mapping_func_deprecated(): + # GH 53325 s = Series([1, 2, 3]) def foo1(x, a=1, c=0): @@ -124,13 +128,13 @@ def foo1(x, a=1, c=0): def foo2(x, b=2, c=0): return x + b + c - msg = r"foo1\(\) got an unexpected keyword argument 'b'" - with pytest.raises(TypeError, match=msg): - s.agg([foo1, foo2], 0, 3, b=3, c=4) - - result = s.agg([foo1, foo2], 0, 3, c=4) - expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]}) - tm.assert_frame_equal(result, expected) + msg = "using .+ in Series.agg cannot aggregate and" + with tm.assert_produces_warning(FutureWarning, match=msg): + s.agg(foo1, 0, 3, c=4) + with tm.assert_produces_warning(FutureWarning, match=msg): + s.agg([foo1, foo2], 0, 3, c=4) + with tm.assert_produces_warning(FutureWarning, match=msg): + s.agg({"a": foo1, "b": foo2}, 0, 3, c=4) def test_series_apply_map_box_timestamps(by_row): @@ -391,23 +395,32 @@ def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row): assert result == str(string_series) -def test_with_nested_series(datetime_series): +def test_agg_evaluate_lambdas(string_series): + # GH53325 + # in the future, the result will be a Series class. + + with tm.assert_produces_warning(FutureWarning): + result = string_series.agg(lambda x: type(x)) + assert isinstance(result, Series) and len(result) == len(string_series) + + with tm.assert_produces_warning(FutureWarning): + result = string_series.agg(type) + assert isinstance(result, Series) and len(result) == len(string_series) + + +@pytest.mark.parametrize("op_name", ["agg", "apply"]) +def test_with_nested_series(datetime_series, op_name): # GH 2316 # .agg with a reducer and a transform, what to do msg = "Returning a DataFrame from Series.apply when the supplied function" with tm.assert_produces_warning(FutureWarning, match=msg): # GH52123 - result = datetime_series.apply( + result = getattr(datetime_series, op_name)( lambda x: Series([x, x**2], index=["x", "x^2"]) ) expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH52123 - result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) - tm.assert_frame_equal(result, expected) - def test_replicate_describe(string_series, by_row): # this also tests a result set that is all scalars diff --git a/pandas/tests/apply/test_series_transform.py b/pandas/tests/apply/test_series_transform.py index b10af13eae20c..82592c4711ece 100644 --- a/pandas/tests/apply/test_series_transform.py +++ b/pandas/tests/apply/test_series_transform.py @@ -10,6 +10,21 @@ import pandas._testing as tm +@pytest.mark.parametrize( + "args, kwargs, increment", + [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], +) +def test_agg_args(args, kwargs, increment): + # GH 43357 + def f(x, a=0, b=0, c=0): + return x + a + 10 * b + 100 * c + + s = Series([1, 2]) + result = s.transform(f, 0, *args, **kwargs) + expected = s + increment + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( "ops, names", [ @@ -28,6 +43,26 @@ def test_transform_listlike(string_series, ops, names): tm.assert_frame_equal(result, expected) +def test_transform_listlike_func_with_args(): + # GH 50624 + + s = Series([1, 2, 3]) + + def foo1(x, a=1, c=0): + return x + a + c + + def foo2(x, b=2, c=0): + return x + b + c + + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + s.transform([foo1, foo2], 0, 3, b=3, c=4) + + result = s.transform([foo1, foo2], 0, 3, c=4) + expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [dict, Series]) def test_transform_dictlike(string_series, box): # GH 35964