REGR: merge is failing with right_index=True and how='right' · Issue #24897 · pandas-dev/pandas (original) (raw)
GeoPandas tests started failing, and this is due to a regression in the merge functionality:
In [13]: left = pd.DataFrame({'a': [1, 2, 3], 'key': [0, 1, 1]})
In [14]: right = pd.DataFrame({'b': [1, 2, 3]})
In [15]: left.merge(right, left_on='key', right_index=True)
Out[15]:
a key b
0 1 0 1
1 2 1 2
2 3 1 2
In [16]: left.merge(right, left_on='key', right_index=True, how='right')
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-16-cb931ec12982> in <module>
----> 1 left.merge(right, left_on='key', right_index=True, how='right')
~/scipy/pandas/pandas/core/frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
6875 right_on=right_on, left_index=left_index,
6876 right_index=right_index, sort=sort, suffixes=suffixes,
-> 6877 copy=copy, indicator=indicator, validate=validate)
6878
6879 def round(self, decimals=0, *args, **kwargs):
~/scipy/pandas/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
46 copy=copy, indicator=indicator,
47 validate=validate)
---> 48 return op.get_result()
49
50
~/scipy/pandas/pandas/core/reshape/merge.py in get_result(self)
544 self.left, self.right)
545
--> 546 join_index, left_indexer, right_indexer = self._get_join_info()
547
548 ldata, rdata = self.left._data, self.right._data
~/scipy/pandas/pandas/core/reshape/merge.py in _get_join_info(self)
761 self.right.index,
762 left_indexer,
--> 763 how='right')
764 else:
765 join_index = self.right.index.take(right_indexer)
~/scipy/pandas/pandas/core/reshape/merge.py in _create_join_index(self, index, other_index, indexer, how)
807 # take from other_index instead
808 join_list = join_index.to_numpy()
--> 809 join_list[mask] = other_index.to_numpy()[mask]
810 join_index = Index(join_list, dtype=join_index.dtype,
811 name=join_index.name)
IndexError: boolean index did not match indexed array along dimension 0; dimension is 3 but corresponding boolean dimension is 4
This seems to be related to #24733, which was merged couple of days ago (after the RC).