BUG: group.apply with non-lexsorted levels and sort=True · Issue #14776 · pandas-dev/pandas (original) (raw)
In [1]: df = pd.DataFrame({'x': ['a', 'a', 'b', 'a'], 'y': [1, 1, 2, 2], 'z': [1, 2,3, 4]}).set_index(['x', 'y'])
In [2]: df
Out[2]:
z
x y
a 1 1
1 2
b 2 3
a 2 4
In [3]: df.index
Out[3]:
MultiIndex(levels=[['a', 'b'], [1, 2]],
labels=[[0, 0, 1, 0], [0, 0, 1, 1]],
names=['x', 'y'])
In [5]: df.index.is_lexsorted()
Out[5]: False
In [6]: df.groupby(level=[0,1]).sum()
Out[6]:
z
x y
a 1 3
2 4
b 2 3
In [7]: df.groupby(level=[0,1]).apply(pd.DataFrame.drop_duplicates)
Exception: cannot handle a non-unique multi-index!
In [8]: df.sort_index().groupby(level=[0,1]).apply(pd.DataFrame.drop_duplicates)
Out[8]:
z
x y
a 1 1
1 2
2 4
b 2 3
# this is ok though
In [9]: df.groupby(level=[0,1], sort=False).apply(pd.DataFrame.drop_duplicates)
Out[9]:
z
x y
a 1 1
1 2
b 2 3
a 2 4