REGR: merge is failing with right_index=True and how='right' · Issue #24897 · pandas-dev/pandas (original) (raw)

GeoPandas tests started failing, and this is due to a regression in the merge functionality:

In [13]: left = pd.DataFrame({'a': [1, 2, 3], 'key': [0, 1, 1]})                                                                                                                                                    

In [14]: right = pd.DataFrame({'b': [1, 2, 3]})                                                                                                                                                                     

In [15]: left.merge(right, left_on='key', right_index=True)                                                                                                                                                         
Out[15]: 
   a  key  b
0  1    0  1
1  2    1  2
2  3    1  2

In [16]: left.merge(right, left_on='key', right_index=True, how='right')                                                                                                                                            
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-16-cb931ec12982> in <module>
----> 1 left.merge(right, left_on='key', right_index=True, how='right')

~/scipy/pandas/pandas/core/frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
   6875                      right_on=right_on, left_index=left_index,
   6876                      right_index=right_index, sort=sort, suffixes=suffixes,
-> 6877                      copy=copy, indicator=indicator, validate=validate)
   6878 
   6879     def round(self, decimals=0, *args, **kwargs):

~/scipy/pandas/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
     46                          copy=copy, indicator=indicator,
     47                          validate=validate)
---> 48     return op.get_result()
     49 
     50 

~/scipy/pandas/pandas/core/reshape/merge.py in get_result(self)
    544                 self.left, self.right)
    545 
--> 546         join_index, left_indexer, right_indexer = self._get_join_info()
    547 
    548         ldata, rdata = self.left._data, self.right._data

~/scipy/pandas/pandas/core/reshape/merge.py in _get_join_info(self)
    761                                                          self.right.index,
    762                                                          left_indexer,
--> 763                                                          how='right')
    764                 else:
    765                     join_index = self.right.index.take(right_indexer)

~/scipy/pandas/pandas/core/reshape/merge.py in _create_join_index(self, index, other_index, indexer, how)
    807                 # take from other_index instead
    808                 join_list = join_index.to_numpy()
--> 809                 join_list[mask] = other_index.to_numpy()[mask]
    810                 join_index = Index(join_list, dtype=join_index.dtype,
    811                                    name=join_index.name)

IndexError: boolean index did not match indexed array along dimension 0; dimension is 3 but corresponding boolean dimension is 4

This seems to be related to #24733, which was merged couple of days ago (after the RC).