Merge on CategoricalIndex fails if left_index=True & right_index=True, but not if on={index} · Issue #28189 · pandas-dev/pandas (original) (raw)

Code Sample, a copy-pastable example if possible

import pandas as pd import numpy as np

pdf = pd.DataFrame({ "idx": pd.Categorical(["1"] * 4), "value": [1, 2, 3, 4] }) pdf = pdf.set_index("idx") pdf

value
idx
1 1
1 2
1 3
1 4

agg = pdf.groupby("idx").agg(np.sum)["value"] agg

idx
1    10
Name: value, dtype: int64

merged = pd.merge(pdf, agg, how="left", left_index=True, right_index=True) merged

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-89-5347bee83336> in <module>
----> 1 merged = pd.merge(pdf, agg, how="left", left_index=True, right_index=True)
      2 merged


/usr/local/lib/python3.7/site-packages/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
     46                          copy=copy, indicator=indicator,
     47                          validate=validate)
---> 48     return op.get_result()
     49 
     50 


/usr/local/lib/python3.7/site-packages/pandas/core/reshape/merge.py in get_result(self)
    544                 self.left, self.right)
    545 
--> 546         join_index, left_indexer, right_indexer = self._get_join_info()
    547 
    548         ldata, rdata = self.left._data, self.right._data


/usr/local/lib/python3.7/site-packages/pandas/core/reshape/merge.py in _get_join_info(self)
    742             join_index, left_indexer, right_indexer = \
    743                 left_ax.join(right_ax, how=self.how, return_indexers=True,
--> 744                              sort=self.sort)
    745         elif self.right_index and self.how == 'left':
    746             join_index, left_indexer, right_indexer = \


/usr/local/lib/python3.7/site-packages/pandas/core/indexes/base.py in join(self, other, how, level, return_indexers, sort)
   3291             if self.is_monotonic and other.is_monotonic:
   3292                 return self._join_monotonic(other, how=how,
-> 3293                                             return_indexers=return_indexers)
   3294             else:
   3295                 return self._join_non_unique(other, how=how,


/usr/local/lib/python3.7/site-packages/pandas/core/indexes/base.py in _join_monotonic(self, other, how, return_indexers)
   3583         else:
   3584             if how == 'left':
-> 3585                 join_index, lidx, ridx = self._left_indexer(sv, ov)
   3586             elif how == 'right':
   3587                 join_index, ridx, lidx = self._left_indexer(ov, sv)


/usr/local/lib/python3.7/site-packages/pandas/core/indexes/base.py in _left_indexer(self, left, right)
    217 
    218     def _left_indexer(self, left, right):
--> 219         return libjoin.left_join_indexer(left, right)
    220 
    221     def _inner_indexer(self, left, right):


pandas/_libs/join.pyx in pandas._libs.join.__pyx_fused_cpdef()


TypeError: No matching signature found

Problem description

The problem is triggered in libjoin.left_join_indexer(left, right), where both left and right have dtype(int8), which raises the TypeError: No matching signature found.

Interestingly enough, if you change from

pd.merge(pdf, agg, how="left", left_index=True, right_index=True)

to

pd.merge(pdf, agg, how="left", on="idx")

everything works fine as demonstrated below.

Expected Output

merged = pd.merge(pdf, agg, how="left", on="idx") merged

value_x value_y
idx
1 1 10
1 2 10
1 3 10
1 4 10

Output of pd.show_versions()

INSTALLED VERSIONS

commit : None

pandas : 0.25.1
numpy : 1.16.3
pytz : 2019.1
dateutil : 2.7.5
pip : 19.1.1
setuptools : 41.0.1
Cython : 0.29.13
pytest : None
hypothesis : None
sphinx : 2.2.0
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : 1.0.1
pymysql : None
psycopg2 : None
jinja2 : 2.10
IPython : 7.5.0
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.1.1
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
s3fs : None
scipy : 1.3.0
sqlalchemy : None
tables : None
xarray : None
xlrd : None
xlwt : None
xlsxwriter : None