BUG: Fix calling groupBy(...).apply(func) on an empty dataframe invok… · pandas-dev/pandas@8b0ad71 (original) (raw)
3 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -77,6 +77,7 @@ Fixed regressions | ||
77 | 77 | - Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`) |
78 | 78 | - Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`) |
79 | 79 | - Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`) |
80 | +- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`) | |
80 | 81 | |
81 | 82 | .. --------------------------------------------------------------------------- |
82 | 83 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -787,15 +787,14 @@ def apply( | ||
787 | 787 | if not mutated and not _is_indexed_like(res, group_axes, axis): |
788 | 788 | mutated = True |
789 | 789 | result_values.append(res) |
790 | - | |
791 | 790 | # getattr pattern for __name__ is needed for functools.partial objects |
792 | -if len(group_keys) == 0 and getattr(f, "__name__", None) not in [ | |
793 | -"idxmin", | |
794 | -"idxmax", | |
795 | -"nanargmin", | |
796 | -"nanargmax", | |
791 | +if len(group_keys) == 0 and getattr(f, "__name__", None) in [ | |
792 | +"mad", | |
793 | +"skew", | |
794 | +"sum", | |
795 | +"prod", | |
797 | 796 | ]: |
798 | -# If group_keys is empty, then no function calls have been made, | |
797 | +# If group_keys is empty, then no function calls have been made, | |
799 | 798 | # so we will not have raised even if this is an invalid dtype. |
800 | 799 | # So do one dummy call here to raise appropriate TypeError. |
801 | 800 | f(data.iloc[:0]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1331,3 +1331,28 @@ def test_result_name_when_one_group(name): | ||
1331 | 1331 | expected = Series([1, 2], name=name) |
1332 | 1332 | |
1333 | 1333 | tm.assert_series_equal(result, expected) |
1334 | + | |
1335 | + | |
1336 | +@pytest.mark.parametrize( | |
1337 | + "method, op", | |
1338 | + [ | |
1339 | + ("apply", lambda gb: gb.values[-1]), | |
1340 | + ("apply", lambda gb: gb["b"].iloc[0]), | |
1341 | + ("agg", "mad"), | |
1342 | + ("agg", "skew"), | |
1343 | + ("agg", "prod"), | |
1344 | + ("agg", "sum"), | |
1345 | + ], | |
1346 | +) | |
1347 | +def test_empty_df(method, op): | |
1348 | +# GH 47985 | |
1349 | +empty_df = DataFrame({"a": [], "b": []}) | |
1350 | +gb = empty_df.groupby("a", group_keys=True) | |
1351 | +group = getattr(gb, "b") | |
1352 | + | |
1353 | +result = getattr(group, method)(op) | |
1354 | +expected = Series( | |
1355 | + [], name="b", dtype="float64", index=Index([], dtype="float64", name="a") | |
1356 | + ) | |
1357 | + | |
1358 | +tm.assert_series_equal(result, expected) |