BUG: merging on an Integer EA raises · Issue #23020 · pandas-dev/pandas (original) (raw)

In [1]: df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'), 'B': 1})
In [2]: pd.merge(df, df, on='B')
Out[2]: 
   A_x  B  A_y
0    1  1    1
1    1  1    2
2    1  1  NaN
3    2  1    1
4    2  1    2
5    2  1  NaN
6  NaN  1    1
7  NaN  1    2
8  NaN  1  NaN

In [3]: pd.merge(df, df, on='B').dtypes
Out[3]: 
A_x    Int64
B      int64
A_y    Int64
dtype: object
In [4]: pd.merge(df, df, on='A').dtypes
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-4-e77608ca3973> in <module>()
----> 1 pd.merge(df, df, on='A').dtypes

~/pandas/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
     61                          copy=copy, indicator=indicator,
     62                          validate=validate)
---> 63     return op.get_result()
     64 
     65 

~/pandas/pandas/core/reshape/merge.py in get_result(self)
    562                 self.left, self.right)
    563 
--> 564         join_index, left_indexer, right_indexer = self._get_join_info()
    565 
    566         ldata, rdata = self.left._data, self.right._data

~/pandas/pandas/core/reshape/merge.py in _get_join_info(self)
    771         else:
    772             (left_indexer,
--> 773              right_indexer) = self._get_join_indexers()
    774 
    775             if self.right_index:

~/pandas/pandas/core/reshape/merge.py in _get_join_indexers(self)
    750                                   self.right_join_keys,
    751                                   sort=self.sort,
--> 752                                   how=self.how)
    753 
    754     def _get_join_info(self):

~/pandas/pandas/core/reshape/merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
   1120 
   1121     # get left & right join labels and num. of levels at each location
-> 1122     llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))
   1123 
   1124     # get flat i8 keys from label lists

~/pandas/pandas/core/reshape/merge.py in _factorize_keys(lk, rk, sort)
   1554     elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
   1555         klass = libhashtable.Int64Factorizer
-> 1556         lk = ensure_int64(com.values_from_object(lk))
   1557         rk = ensure_int64(com.values_from_object(rk))
   1558     else:   1557         rk = ensure_int64(com.values_from_object(rk))
   1558     else:

~/pandas/pandas/_libs/algos_common_helper.pxi in pandas._libs.algos.ensure_int64()

ValueError: cannot convert float NaN to integer