Skip to content

Commit ce123cd

Browse files
authored
REGR: NumPy func warning when dropping nuisance in agg, apply, transform (#50627)
REGR: Warnings for NumPy funcs when dropping nuisance in agg, apply, transform
1 parent 54b4037 commit ce123cd

File tree

10 files changed

+163
-13
lines changed

10 files changed

+163
-13
lines changed

doc/source/whatsnew/v1.5.3.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`)
2020
- Fixed regression in :meth:`SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`)
2121
- Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`)
22+
- Fixed regression in the methods ``apply``, ``agg``, and ``transform`` when used with NumPy functions that informed users to supply ``numeric_only=True`` if the operation failed on non-numeric dtypes; such columns must be dropped prior to using these methods (:issue:`50538`)
2223
-
2324

2425
.. ---------------------------------------------------------------------------

pandas/core/apply.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@
3939
SpecificationError,
4040
)
4141
from pandas.util._decorators import cache_readonly
42-
from pandas.util._exceptions import find_stack_level
42+
from pandas.util._exceptions import (
43+
find_stack_level,
44+
rewrite_warning,
45+
)
4346

4447
from pandas.core.dtypes.cast import is_nested_object
4548
from pandas.core.dtypes.common import (
@@ -174,7 +177,15 @@ def agg(self) -> DataFrame | Series | None:
174177
if callable(arg):
175178
f = com.get_cython_func(arg)
176179
if f and not args and not kwargs:
177-
return getattr(obj, f)()
180+
# GH#50538
181+
old_msg = "The default value of numeric_only"
182+
new_msg = (
183+
f"The operation {arg} failed on a column. If any error is "
184+
f"raised, this will raise an exception in a future version "
185+
f"of pandas. Drop these columns to avoid this warning."
186+
)
187+
with rewrite_warning(old_msg, FutureWarning, new_msg):
188+
return getattr(obj, f)()
178189

179190
# caller can react
180191
return None
@@ -309,7 +320,14 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
309320
if not args and not kwargs:
310321
f = com.get_cython_func(func)
311322
if f:
312-
return getattr(obj, f)()
323+
old_msg = "The default value of numeric_only"
324+
new_msg = (
325+
f"The operation {func} failed on a column. If any error is "
326+
f"raised, this will raise an exception in a future version "
327+
f"of pandas. Drop these columns to avoid this warning."
328+
)
329+
with rewrite_warning(old_msg, FutureWarning, new_msg):
330+
return getattr(obj, f)()
313331

314332
# Two possible ways to use a UDF - apply or call directly
315333
try:

pandas/core/groupby/groupby.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ class providing the base-class of operations.
88
"""
99
from __future__ import annotations
1010

11-
from contextlib import contextmanager
11+
from contextlib import (
12+
contextmanager,
13+
nullcontext,
14+
)
1215
import datetime
1316
from functools import (
1417
partial,
@@ -64,7 +67,10 @@ class providing the base-class of operations.
6467
cache_readonly,
6568
doc,
6669
)
67-
from pandas.util._exceptions import find_stack_level
70+
from pandas.util._exceptions import (
71+
find_stack_level,
72+
rewrite_warning,
73+
)
6874

6975
from pandas.core.dtypes.cast import ensure_dtype_can_hold_na
7076
from pandas.core.dtypes.common import (
@@ -1508,7 +1514,9 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)
15081514
)
15091515
)
15101516
def apply(self, func, *args, **kwargs) -> NDFrameT:
1511-
1517+
# GH#50538
1518+
is_np_func = func in com._cython_table and func not in com._builtin_table
1519+
orig_func = func
15121520
func = com.is_builtin_func(func)
15131521

15141522
if isinstance(func, str):
@@ -1546,7 +1554,17 @@ def f(g):
15461554
# ignore SettingWithCopy here in case the user mutates
15471555
with option_context("mode.chained_assignment", None):
15481556
try:
1549-
result = self._python_apply_general(f, self._selected_obj)
1557+
# GH#50538
1558+
old_msg = "The default value of numeric_only"
1559+
new_msg = (
1560+
f"The operation {orig_func} failed on a column. If any error is "
1561+
f"raised, this will raise an exception in a future version "
1562+
f"of pandas. Drop these columns to avoid this warning."
1563+
)
1564+
with rewrite_warning(
1565+
old_msg, FutureWarning, new_msg
1566+
) if is_np_func else nullcontext():
1567+
result = self._python_apply_general(f, self._selected_obj)
15501568
except TypeError:
15511569
# gh-20949
15521570
# try again, with .apply acting as a filtering
@@ -1557,7 +1575,17 @@ def f(g):
15571575
# on a string grouper column
15581576

15591577
with self._group_selection_context():
1560-
return self._python_apply_general(f, self._selected_obj)
1578+
# GH#50538
1579+
old_msg = "The default value of numeric_only"
1580+
new_msg = (
1581+
f"The operation {orig_func} failed on a column. If any error "
1582+
f"is raised, this will raise an exception in a future version "
1583+
f"of pandas. Drop these columns to avoid this warning."
1584+
)
1585+
with rewrite_warning(
1586+
old_msg, FutureWarning, new_msg
1587+
) if is_np_func else nullcontext():
1588+
return self._python_apply_general(f, self._selected_obj)
15611589

15621590
return result
15631591

pandas/tests/apply/test_frame_apply.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,27 @@ def test_nuiscance_columns():
12871287
tm.assert_frame_equal(result, expected)
12881288

12891289

1290+
@pytest.mark.parametrize("method", ["agg", "apply", "transform"])
1291+
def test_numeric_only_warning_numpy(method):
1292+
# GH#50538
1293+
df = DataFrame({"a": [1, 1, 2], "b": list("xyz")})
1294+
if method == "agg":
1295+
msg = "The operation <function mean.*failed"
1296+
with tm.assert_produces_warning(FutureWarning, match=msg):
1297+
getattr(df, method)(np.mean)
1298+
# Ensure users can't pass numeric_only
1299+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
1300+
getattr(df, method)(np.mean, numeric_only=True)
1301+
elif method == "apply":
1302+
with pytest.raises(TypeError, match="Could not convert"):
1303+
getattr(df, method)(np.mean)
1304+
else:
1305+
with pytest.raises(ValueError, match="Function did not transform"):
1306+
msg = "The operation <function mean.*failed"
1307+
with tm.assert_produces_warning(FutureWarning, match=msg):
1308+
getattr(df, method)(np.mean)
1309+
1310+
12901311
@pytest.mark.parametrize("how", ["agg", "apply"])
12911312
def test_non_callable_aggregates(how):
12921313

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,3 +1454,15 @@ def test_agg_of_mode_list(test, constant):
14541454
expected = expected.set_index(0)
14551455

14561456
tm.assert_frame_equal(result, expected)
1457+
1458+
1459+
def test_numeric_only_warning_numpy():
1460+
# GH#50538
1461+
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
1462+
gb = df.groupby("a")
1463+
msg = "The operation <function mean.*failed"
1464+
with tm.assert_produces_warning(FutureWarning, match=msg):
1465+
gb.agg(np.mean)
1466+
# Ensure users can't pass numeric_only
1467+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
1468+
gb.agg(np.mean, numeric_only=True)

pandas/tests/groupby/test_apply.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,3 +1357,16 @@ def test_empty_df(method, op):
13571357
)
13581358

13591359
tm.assert_series_equal(result, expected)
1360+
1361+
1362+
def test_numeric_only_warning_numpy():
1363+
# GH#50538
1364+
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
1365+
gb = df.groupby("a")
1366+
msg = "The operation <function mean.*failed"
1367+
# Warning is raised from within NumPy
1368+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
1369+
gb.apply(np.mean)
1370+
# Ensure users can't pass numeric_only
1371+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
1372+
gb.apply(np.mean, numeric_only=True)

pandas/tests/groupby/test_groupby.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,14 @@ def test_frame_set_name_single(df):
486486
result = df.groupby("A", as_index=False).mean()
487487
assert result.index.name != "A"
488488

489+
# GH#50538
490+
msg = "The operation <function mean.*failed"
489491
with tm.assert_produces_warning(FutureWarning, match=msg):
490492
result = grouped.agg(np.mean)
491493
assert result.index.name == "A"
494+
# Ensure users can't pass numeric_only
495+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
496+
grouped.agg(np.mean, numeric_only=True)
492497

493498
result = grouped.agg({"C": np.mean, "D": np.std})
494499
assert result.index.name == "A"
@@ -766,19 +771,24 @@ def test_as_index_series_return_frame(df):
766771
grouped = df.groupby("A", as_index=False)
767772
grouped2 = df.groupby(["A", "B"], as_index=False)
768773

769-
msg = "The default value of numeric_only"
774+
# GH#50538
775+
msg = "The operation <function sum.*failed"
770776
with tm.assert_produces_warning(FutureWarning, match=msg):
771777
result = grouped["C"].agg(np.sum)
772778
expected = grouped.agg(np.sum).loc[:, ["A", "C"]]
773779
assert isinstance(result, DataFrame)
774780
tm.assert_frame_equal(result, expected)
781+
# Ensure users can't pass numeric_only
782+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
783+
grouped.agg(np.mean, numeric_only=True)
775784

776785
result2 = grouped2["C"].agg(np.sum)
777786
expected2 = grouped2.agg(np.sum).loc[:, ["A", "B", "C"]]
778787
assert isinstance(result2, DataFrame)
779788
tm.assert_frame_equal(result2, expected2)
780789

781790
result = grouped["C"].sum()
791+
msg = "The default value of numeric_only"
782792
with tm.assert_produces_warning(FutureWarning, match=msg):
783793
expected = grouped.sum().loc[:, ["A", "C"]]
784794
assert isinstance(result, DataFrame)
@@ -1021,10 +1031,14 @@ def test_wrap_aggregated_output_multindex(mframe):
10211031
df["baz", "two"] = "peekaboo"
10221032

10231033
keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
1024-
msg = "The default value of numeric_only"
1034+
# GH#50538
1035+
msg = "The operation <function mean.*failed"
10251036
with tm.assert_produces_warning(FutureWarning, match=msg):
10261037
agged = df.groupby(keys).agg(np.mean)
10271038
assert isinstance(agged.columns, MultiIndex)
1039+
# Ensure users can't pass numeric_only
1040+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
1041+
df.groupby(keys).agg(np.mean, numeric_only=True)
10281042

10291043
def aggfun(ser):
10301044
if ser.name == ("foo", "one"):

pandas/tests/groupby/transform/test_transform.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,3 +1563,18 @@ def test_as_index_no_change(keys, df, groupby_func):
15631563
result = gb_as_index_true.transform(groupby_func, *args)
15641564
expected = gb_as_index_false.transform(groupby_func, *args)
15651565
tm.assert_equal(result, expected)
1566+
1567+
1568+
@pytest.mark.parametrize("func", [np.mean, np.cumprod])
1569+
def test_numeric_only_warning_numpy(func):
1570+
# GH#50538
1571+
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
1572+
gb = df.groupby("a")
1573+
msg = "The default value of numeric_only"
1574+
with tm.assert_produces_warning(FutureWarning, match=msg):
1575+
gb.transform(func)
1576+
# Ensure users can pass numeric_only
1577+
result = gb.transform(func, numeric_only=True)
1578+
values = [3.5, 3.5, 5.0] if func == np.mean else [3, 12, 5]
1579+
expected = DataFrame({"c": values})
1580+
tm.assert_frame_equal(result, expected)

pandas/tests/resample/test_resample_api.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,3 +938,24 @@ def test_series_downsample_method(method, numeric_only, expected_data):
938938
result = func(numeric_only=numeric_only)
939939
expected = Series(expected_data, index=expected_index)
940940
tm.assert_series_equal(result, expected)
941+
942+
943+
@pytest.mark.parametrize("method", ["agg", "apply", "transform"])
944+
def test_numeric_only_warning_numpy(method):
945+
# GH#50538
946+
resampled = _test_frame.assign(D="x").resample("H")
947+
if method == "transform":
948+
msg = "The default value of numeric_only"
949+
with tm.assert_produces_warning(FutureWarning, match=msg):
950+
getattr(resampled, method)(np.mean)
951+
# Ensure users can pass numeric_only
952+
result = getattr(resampled, method)(np.mean, numeric_only=True)
953+
expected = resampled.transform("mean", numeric_only=True)
954+
tm.assert_frame_equal(result, expected)
955+
else:
956+
msg = "The operation <function mean.*failed"
957+
with tm.assert_produces_warning(FutureWarning, match=msg):
958+
getattr(resampled, method)(np.mean)
959+
# Ensure users can't pass numeric_only
960+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
961+
getattr(resampled, method)(np.mean, numeric_only=True)

pandas/tests/reshape/test_pivot.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ def test_pivot_table_nocols(self):
146146
df = DataFrame(
147147
{"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
148148
)
149-
msg = "pivot_table dropped a column because it failed to aggregate"
149+
# GH#50538
150+
msg = "The operation <function sum.*failed"
150151
with tm.assert_produces_warning(FutureWarning, match=msg):
151152
rs = df.pivot_table(columns="cols", aggfunc=np.sum)
152153
xp = df.pivot_table(index="cols", aggfunc=np.sum).T
@@ -907,7 +908,8 @@ def test_no_col(self):
907908

908909
# to help with a buglet
909910
self.data.columns = [k * 2 for k in self.data.columns]
910-
msg = "pivot_table dropped a column because it failed to aggregate"
911+
# GH#50538
912+
msg = "The operation <function mean.*failed"
911913
with tm.assert_produces_warning(FutureWarning, match=msg):
912914
table = self.data.pivot_table(
913915
index=["AA", "BB"], margins=True, aggfunc=np.mean
@@ -916,6 +918,7 @@ def test_no_col(self):
916918
totals = table.loc[("All", ""), value_col]
917919
assert totals == self.data[value_col].mean()
918920

921+
msg = "pivot_table dropped a column because it failed to aggregate"
919922
with tm.assert_produces_warning(FutureWarning, match=msg):
920923
table = self.data.pivot_table(
921924
index=["AA", "BB"], margins=True, aggfunc="mean"
@@ -975,7 +978,11 @@ def test_margin_with_only_columns_defined(
975978
}
976979
)
977980

978-
msg = "pivot_table dropped a column because it failed to aggregate"
981+
if aggfunc == "sum":
982+
msg = "pivot_table dropped a column because it failed to aggregate"
983+
else:
984+
# GH#50538
985+
msg = "The operation <function mean.*failed"
979986
with tm.assert_produces_warning(FutureWarning, match=msg):
980987
result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
981988
expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy