groupby.apply fails with ValueError: cannot reindex from a duplicate axis · Issue #30667 · pandas-dev/pandas (original) (raw)
import pandas as pd
df=pd.DataFrame([['x','p'],['x','p'],['x','q']], columns=['X','Y'], index=[1,2,2]) print(df) df=df.groupby(['Y']).apply(lambda x: x)
df=pd.DataFrame([['x','p'],['x','p'],['x','o']], columns=['X','Y'], index=[1,2,2]) print(df) df=df.groupby(['Y']).apply(lambda x: x)
Traceback (most recent call last):
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 725, in apply
result = self._python_apply_general(f)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 745, in _python_apply_general
keys, values, not_indexed_same=mutated or self.mutated
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/generic.py", line 372, in _wrap_applied_output
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 955, in _concat_objects
result = result.reindex(ax, axis=self.axis)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/util/_decorators.py", line 221, in wrapper
return func(*args, **kwargs)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3976, in reindex
return super().reindex(**kwargs)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/generic.py", line 4514, in reindex
axes, level, limit, tolerance, method, fill_value, copy
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3864, in _reindex_axes
index, method, copy, level, fill_value, limit, tolerance
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3886, in _reindex_index
allow_dups=False,
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/generic.py", line 4577, in _reindex_with_indexers
copy=copy,
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/internals/managers.py", line 1251, in reindex_indexer
self.axes[axis]._can_reindex(indexer)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3362, in _can_reindex
raise ValueError("cannot reindex from a duplicate axis")
ValueError: cannot reindex from a duplicate axis
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/hm106930/ipv/web/groupby_bug.py", line 16, in <module>
df=df.groupby(['Y']).apply(lambda x: x)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 737, in apply
return self._python_apply_general(f)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 745, in _python_apply_general
keys, values, not_indexed_same=mutated or self.mutated
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/generic.py", line 372, in _wrap_applied_output
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 955, in _concat_objects
result = result.reindex(ax, axis=self.axis)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/util/_decorators.py", line 221, in wrapper
return func(*args, **kwargs)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3976, in reindex
return super().reindex(**kwargs)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/generic.py", line 4514, in reindex
axes, level, limit, tolerance, method, fill_value, copy
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3864, in _reindex_axes
index, method, copy, level, fill_value, limit, tolerance
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3886, in _reindex_index
allow_dups=False,
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/generic.py", line 4577, in _reindex_with_indexers
copy=copy,
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/internals/managers.py", line 1251, in reindex_indexer
self.axes[axis]._can_reindex(indexer)
File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3362, in _can_reindex
raise ValueError("cannot reindex from a duplicate axis")
ValueError: cannot reindex from a duplicate axis