groupby.apply fails with ValueError: cannot reindex from a duplicate axis · Issue #30667 · pandas-dev/pandas (original) (raw)

import pandas as pd

df=pd.DataFrame([['x','p'],['x','p'],['x','q']], columns=['X','Y'], index=[1,2,2]) print(df) df=df.groupby(['Y']).apply(lambda x: x)

df=pd.DataFrame([['x','p'],['x','p'],['x','o']], columns=['X','Y'], index=[1,2,2]) print(df) df=df.groupby(['Y']).apply(lambda x: x)

Traceback (most recent call last):
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 725, in apply
    result = self._python_apply_general(f)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 745, in _python_apply_general
    keys, values, not_indexed_same=mutated or self.mutated
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/generic.py", line 372, in _wrap_applied_output
    return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 955, in _concat_objects
    result = result.reindex(ax, axis=self.axis)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/util/_decorators.py", line 221, in wrapper
    return func(*args, **kwargs)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3976, in reindex
    return super().reindex(**kwargs)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/generic.py", line 4514, in reindex
    axes, level, limit, tolerance, method, fill_value, copy
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3864, in _reindex_axes
    index, method, copy, level, fill_value, limit, tolerance
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3886, in _reindex_index
    allow_dups=False,
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/generic.py", line 4577, in _reindex_with_indexers
    copy=copy,
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/internals/managers.py", line 1251, in reindex_indexer
    self.axes[axis]._can_reindex(indexer)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3362, in _can_reindex
    raise ValueError("cannot reindex from a duplicate axis")
ValueError: cannot reindex from a duplicate axis

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hm106930/ipv/web/groupby_bug.py", line 16, in <module>
    df=df.groupby(['Y']).apply(lambda x: x)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 737, in apply
    return self._python_apply_general(f)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 745, in _python_apply_general
    keys, values, not_indexed_same=mutated or self.mutated
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/generic.py", line 372, in _wrap_applied_output
    return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/groupby/groupby.py", line 955, in _concat_objects
    result = result.reindex(ax, axis=self.axis)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/util/_decorators.py", line 221, in wrapper
    return func(*args, **kwargs)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3976, in reindex
    return super().reindex(**kwargs)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/generic.py", line 4514, in reindex
    axes, level, limit, tolerance, method, fill_value, copy
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3864, in _reindex_axes
    index, method, copy, level, fill_value, limit, tolerance
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/frame.py", line 3886, in _reindex_index
    allow_dups=False,
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/generic.py", line 4577, in _reindex_with_indexers
    copy=copy,
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/internals/managers.py", line 1251, in reindex_indexer
    self.axes[axis]._can_reindex(indexer)
  File "/home/hm106930/.local/share/virtualenvs/ipv-work-62kMEXht/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3362, in _can_reindex
    raise ValueError("cannot reindex from a duplicate axis")
ValueError: cannot reindex from a duplicate axis