BUG: Fix calling groupBy(...).apply(func) on an empty dataframe invok… · pandas-dev/pandas@8b0ad71 (original) (raw)

3 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -77,6 +77,7 @@ Fixed regressions
77 77 - Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
78 78 - Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`)
79 79 - Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`)
80 +- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
80 81
81 82 .. ---------------------------------------------------------------------------
82 83
Original file line number Diff line number Diff line change
@@ -787,15 +787,14 @@ def apply(
787 787 if not mutated and not _is_indexed_like(res, group_axes, axis):
788 788 mutated = True
789 789 result_values.append(res)
790 -
791 790 # getattr pattern for __name__ is needed for functools.partial objects
792 -if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
793 -"idxmin",
794 -"idxmax",
795 -"nanargmin",
796 -"nanargmax",
791 +if len(group_keys) == 0 and getattr(f, "__name__", None) in [
792 +"mad",
793 +"skew",
794 +"sum",
795 +"prod",
797 796 ]:
798 -# If group_keys is empty, then no function calls have been made,
797 +# If group_keys is empty, then no function calls have been made,
799 798 # so we will not have raised even if this is an invalid dtype.
800 799 # So do one dummy call here to raise appropriate TypeError.
801 800 f(data.iloc[:0])
Original file line number Diff line number Diff line change
@@ -1331,3 +1331,28 @@ def test_result_name_when_one_group(name):
1331 1331 expected = Series([1, 2], name=name)
1332 1332
1333 1333 tm.assert_series_equal(result, expected)
1334 +
1335 +
1336 +@pytest.mark.parametrize(
1337 + "method, op",
1338 + [
1339 + ("apply", lambda gb: gb.values[-1]),
1340 + ("apply", lambda gb: gb["b"].iloc[0]),
1341 + ("agg", "mad"),
1342 + ("agg", "skew"),
1343 + ("agg", "prod"),
1344 + ("agg", "sum"),
1345 + ],
1346 +)
1347 +def test_empty_df(method, op):
1348 +# GH 47985
1349 +empty_df = DataFrame({"a": [], "b": []})
1350 +gb = empty_df.groupby("a", group_keys=True)
1351 +group = getattr(gb, "b")
1352 +
1353 +result = getattr(group, method)(op)
1354 +expected = Series(
1355 + [], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
1356 + )
1357 +
1358 +tm.assert_series_equal(result, expected)