Accept multiple lambda in groupby list · Issue #26430 · pandas-dev/pandas (original) (raw)

We currently don't allow duplicate function names in the list passed too .groupby().agg({'col': [aggfuncs]}). This is painful with multiple lambdas, which all have the name <lambda>


SpecificationError Traceback (most recent call last) ~/sandbox/pandas/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs) 483 try: --> 484 result = _agg(arg, _agg_1dim) 485 except SpecificationError:

~/sandbox/pandas/pandas/core/base.py in _agg(arg, func) 434 for fname, agg_how in arg.items(): --> 435 result[fname] = func(fname, agg_how) 436 return result

~/sandbox/pandas/pandas/core/base.py in _agg_1dim(name, how, subset) 417 "in aggregation") --> 418 return colg.aggregate(how, _level=(_level or 0) + 1) 419

~/sandbox/pandas/pandas/core/groupby/generic.py in aggregate(self, func_or_funcs, *args, **kwargs) 771 ret = self._aggregate_multiple_funcs(func_or_funcs, --> 772 (_level or 0) + 1) 773 else:

~/sandbox/pandas/pandas/core/groupby/generic.py in _aggregate_multiple_funcs(self, arg, _level) 834 'Function names must be unique, found multiple named ' --> 835 '{}'.format(name)) 836

SpecificationError: Function names must be unique, found multiple named

During handling of the above exception, another exception occurred:

SpecificationError Traceback (most recent call last) in ----> 1 df.groupby("A").agg({'B': [lambda x: 0, lambda x: 1]})

~/sandbox/pandas/pandas/core/groupby/generic.py in aggregate(self, arg, *args, **kwargs) 1344 @Appender(_shared_docs['aggregate']) 1345 def aggregate(self, arg=None, *args, **kwargs): -> 1346 return super().aggregate(arg, *args, **kwargs) 1347 1348 agg = aggregate

~/sandbox/pandas/pandas/core/groupby/generic.py in aggregate(self, func, *args, **kwargs) 174 "'(column, aggfunc).") 175 --> 176 result, how = self._aggregate(func, _level=_level, *args, **kwargs) 177 if how is None: 178 return result

~/sandbox/pandas/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs) 487 # we are aggregating expecting all 1d-returns 488 # but we have 2d --> 489 result = _agg(arg, _agg_2dim) 490 491 # combine results

~/sandbox/pandas/pandas/core/base.py in _agg(arg, func) 433 result = OrderedDict() 434 for fname, agg_how in arg.items(): --> 435 result[fname] = func(fname, agg_how) 436 return result 437

~/sandbox/pandas/pandas/core/base.py in _agg_2dim(name, how) 424 colg = self._gotitem(self._selection, ndim=2, 425 subset=obj) --> 426 return colg.aggregate(how, _level=None) 427 428 def _agg(arg, func):

~/sandbox/pandas/pandas/core/groupby/generic.py in aggregate(self, arg, *args, **kwargs) 1344 @Appender(_shared_docs['aggregate']) 1345 def aggregate(self, arg=None, *args, **kwargs): -> 1346 return super().aggregate(arg, *args, **kwargs) 1347 1348 agg = aggregate

~/sandbox/pandas/pandas/core/groupby/generic.py in aggregate(self, func, *args, **kwargs) 174 "'(column, aggfunc).") 175 --> 176 result, how = self._aggregate(func, _level=_level, *args, **kwargs) 177 if how is None: 178 return result

~/sandbox/pandas/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs) 542 return self._aggregate_multiple_funcs(arg, 543 _level=_level, --> 544 _axis=_axis), None 545 else: 546 result = None

~/sandbox/pandas/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _level, _axis) 588 colg = self._gotitem(col, ndim=1, 589 subset=obj.iloc[:, index]) --> 590 results.append(colg.aggregate(arg)) 591 keys.append(col) 592 except (TypeError, DataError):

~/sandbox/pandas/pandas/core/groupby/generic.py in aggregate(self, func_or_funcs, *args, **kwargs) 770 # but not the class list / tuple itself. 771 ret = self._aggregate_multiple_funcs(func_or_funcs, --> 772 (_level or 0) + 1) 773 else: 774 cyfunc = self._is_cython_func(func_or_funcs)

~/sandbox/pandas/pandas/core/groupby/generic.py in _aggregate_multiple_funcs(self, arg, _level) 833 raise SpecificationError( 834 'Function names must be unique, found multiple named ' --> 835 '{}'.format(name)) 836 837 # reset the cache so that we

SpecificationError: Function names must be unique, found multiple named

That adds a 1, 2, ... to all subsequent lambdas in the same MI level. It doesn't change the first. Do we want <lambda 0> for the first?

As a side-effect, this enables multiple lambdas per column with the new keyword aggregation

I have a WIP started. Will do for 0.25.