Merge on CategoricalIndex fails if left_index=True & right_index=True, but not if on={index} · Issue #28189 · pandas-dev/pandas (original) (raw)
Code Sample, a copy-pastable example if possible
import pandas as pd import numpy as np
pdf = pd.DataFrame({ "idx": pd.Categorical(["1"] * 4), "value": [1, 2, 3, 4] }) pdf = pdf.set_index("idx") pdf
value | |
---|---|
idx | |
1 | 1 |
1 | 2 |
1 | 3 |
1 | 4 |
agg = pdf.groupby("idx").agg(np.sum)["value"] agg
idx
1 10
Name: value, dtype: int64
merged = pd.merge(pdf, agg, how="left", left_index=True, right_index=True) merged
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-89-5347bee83336> in <module>
----> 1 merged = pd.merge(pdf, agg, how="left", left_index=True, right_index=True)
2 merged
/usr/local/lib/python3.7/site-packages/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
46 copy=copy, indicator=indicator,
47 validate=validate)
---> 48 return op.get_result()
49
50
/usr/local/lib/python3.7/site-packages/pandas/core/reshape/merge.py in get_result(self)
544 self.left, self.right)
545
--> 546 join_index, left_indexer, right_indexer = self._get_join_info()
547
548 ldata, rdata = self.left._data, self.right._data
/usr/local/lib/python3.7/site-packages/pandas/core/reshape/merge.py in _get_join_info(self)
742 join_index, left_indexer, right_indexer = \
743 left_ax.join(right_ax, how=self.how, return_indexers=True,
--> 744 sort=self.sort)
745 elif self.right_index and self.how == 'left':
746 join_index, left_indexer, right_indexer = \
/usr/local/lib/python3.7/site-packages/pandas/core/indexes/base.py in join(self, other, how, level, return_indexers, sort)
3291 if self.is_monotonic and other.is_monotonic:
3292 return self._join_monotonic(other, how=how,
-> 3293 return_indexers=return_indexers)
3294 else:
3295 return self._join_non_unique(other, how=how,
/usr/local/lib/python3.7/site-packages/pandas/core/indexes/base.py in _join_monotonic(self, other, how, return_indexers)
3583 else:
3584 if how == 'left':
-> 3585 join_index, lidx, ridx = self._left_indexer(sv, ov)
3586 elif how == 'right':
3587 join_index, ridx, lidx = self._left_indexer(ov, sv)
/usr/local/lib/python3.7/site-packages/pandas/core/indexes/base.py in _left_indexer(self, left, right)
217
218 def _left_indexer(self, left, right):
--> 219 return libjoin.left_join_indexer(left, right)
220
221 def _inner_indexer(self, left, right):
pandas/_libs/join.pyx in pandas._libs.join.__pyx_fused_cpdef()
TypeError: No matching signature found
Problem description
The problem is triggered in libjoin.left_join_indexer(left, right)
, where both left
and right
have dtype(int8)
, which raises the TypeError: No matching signature found
.
Interestingly enough, if you change from
pd.merge(pdf, agg, how="left", left_index=True, right_index=True)
to
pd.merge(pdf, agg, how="left", on="idx")
everything works fine as demonstrated below.
Expected Output
merged = pd.merge(pdf, agg, how="left", on="idx") merged
value_x | value_y | |
---|---|---|
idx | ||
1 | 1 | 10 |
1 | 2 | 10 |
1 | 3 | 10 |
1 | 4 | 10 |
Output of pd.show_versions()
INSTALLED VERSIONS
commit : None
pandas : 0.25.1
numpy : 1.16.3
pytz : 2019.1
dateutil : 2.7.5
pip : 19.1.1
setuptools : 41.0.1
Cython : 0.29.13
pytest : None
hypothesis : None
sphinx : 2.2.0
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : 1.0.1
pymysql : None
psycopg2 : None
jinja2 : 2.10
IPython : 7.5.0
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.1.1
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
s3fs : None
scipy : 1.3.0
sqlalchemy : None
tables : None
xarray : None
xlrd : None
xlwt : None
xlsxwriter : None