BUG-24212 fix regression in #24897 by JustinZhengBC · Pull Request #24916 · pandas-dev/pandas (original) (raw)
I changed this from 0.24.1 to 0.25.0.
One specific case that seems to be broken by this that I found, is using a categorical as the merge key:
In [15]: left = pd.DataFrame({'a': [1, 2, 3], 'key': pd.Categorical(['a', 'a', 'b'], categories=['a', 'b', 'c'])})
...: right = pd.DataFrame({'b': [1, 2, 3]}, index=pd.Categorical(['a', 'b', 'c']))
In [16]: left.merge(right, left_on='key', right_index=True, how='right')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-e08f8fc28c75> in <module>
----> 1 left.merge(right, left_on='key', right_index=True, how='right')
~/scipy/pandas/pandas/core/frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
6875 right_on=right_on, left_index=left_index,
6876 right_index=right_index, sort=sort, suffixes=suffixes,
-> 6877 copy=copy, indicator=indicator, validate=validate)
6878
6879 def round(self, decimals=0, *args, **kwargs):
~/scipy/pandas/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
46 copy=copy, indicator=indicator,
47 validate=validate)
---> 48 return op.get_result()
49
50
~/scipy/pandas/pandas/core/reshape/merge.py in get_result(self)
544 self.left, self.right)
545
--> 546 join_index, left_indexer, right_indexer = self._get_join_info()
547
548 ldata, rdata = self.left._data, self.right._data
~/scipy/pandas/pandas/core/reshape/merge.py in _get_join_info(self)
762 join_index = self.right.index.take(right_indexer)
763 left_indexer = np.array([-1] * len(join_index))
--> 764 elif self.left_index:
765 if len(self.right) > 0:
766 join_index = self.right.index.take(right_indexer)
~/scipy/pandas/pandas/core/reshape/merge.py in _create_join_index(self, index, other_index, indexer, other_indexer, how)
811
812 # ugh, spaghetti re #733
--> 813 if _any(self.left_on) and _any(self.right_on):
814 for lk, rk in zip(self.left_on, self.right_on):
815 if is_lkey(lk):
ValueError: invalid literal for int() with base 10: 'c'
The above is failing on master now, but works on 0.23.4 / 0.24.0.