BUG-24212 fix regression in #24897 by JustinZhengBC · Pull Request #24916 · pandas-dev/pandas (original) (raw)

I changed this from 0.24.1 to 0.25.0.

One specific case that seems to be broken by this that I found, is using a categorical as the merge key:

In [15]: left = pd.DataFrame({'a': [1, 2, 3], 'key': pd.Categorical(['a', 'a', 'b'], categories=['a', 'b', 'c'])}) 
    ...: right = pd.DataFrame({'b': [1, 2, 3]}, index=pd.Categorical(['a', 'b', 'c']))                                                                                                                              

In [16]: left.merge(right,  left_on='key', right_index=True, how='right')                                                                                                                                           
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-16-e08f8fc28c75> in <module>
----> 1 left.merge(right,  left_on='key', right_index=True, how='right')

~/scipy/pandas/pandas/core/frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
   6875                      right_on=right_on, left_index=left_index,
   6876                      right_index=right_index, sort=sort, suffixes=suffixes,
-> 6877                      copy=copy, indicator=indicator, validate=validate)
   6878 
   6879     def round(self, decimals=0, *args, **kwargs):

~/scipy/pandas/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
     46                          copy=copy, indicator=indicator,
     47                          validate=validate)
---> 48     return op.get_result()
     49 
     50 

~/scipy/pandas/pandas/core/reshape/merge.py in get_result(self)
    544                 self.left, self.right)
    545 
--> 546         join_index, left_indexer, right_indexer = self._get_join_info()
    547 
    548         ldata, rdata = self.left._data, self.right._data

~/scipy/pandas/pandas/core/reshape/merge.py in _get_join_info(self)
    762                     join_index = self.right.index.take(right_indexer)
    763                     left_indexer = np.array([-1] * len(join_index))
--> 764             elif self.left_index:
    765                 if len(self.right) > 0:
    766                     join_index = self.right.index.take(right_indexer)

~/scipy/pandas/pandas/core/reshape/merge.py in _create_join_index(self, index, other_index, indexer, other_indexer, how)
    811 
    812         # ugh, spaghetti re #733
--> 813         if _any(self.left_on) and _any(self.right_on):
    814             for lk, rk in zip(self.left_on, self.right_on):
    815                 if is_lkey(lk):

ValueError: invalid literal for int() with base 10: 'c'

The above is failing on master now, but works on 0.23.4 / 0.24.0.