BUG: merging on an Integer EA raises · Issue #23020 · pandas-dev/pandas (original) (raw)
In [1]: df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'), 'B': 1})
In [2]: pd.merge(df, df, on='B')
Out[2]:
A_x B A_y
0 1 1 1
1 1 1 2
2 1 1 NaN
3 2 1 1
4 2 1 2
5 2 1 NaN
6 NaN 1 1
7 NaN 1 2
8 NaN 1 NaN
In [3]: pd.merge(df, df, on='B').dtypes
Out[3]:
A_x Int64
B int64
A_y Int64
dtype: object
In [4]: pd.merge(df, df, on='A').dtypes
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-e77608ca3973> in <module>()
----> 1 pd.merge(df, df, on='A').dtypes
~/pandas/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
61 copy=copy, indicator=indicator,
62 validate=validate)
---> 63 return op.get_result()
64
65
~/pandas/pandas/core/reshape/merge.py in get_result(self)
562 self.left, self.right)
563
--> 564 join_index, left_indexer, right_indexer = self._get_join_info()
565
566 ldata, rdata = self.left._data, self.right._data
~/pandas/pandas/core/reshape/merge.py in _get_join_info(self)
771 else:
772 (left_indexer,
--> 773 right_indexer) = self._get_join_indexers()
774
775 if self.right_index:
~/pandas/pandas/core/reshape/merge.py in _get_join_indexers(self)
750 self.right_join_keys,
751 sort=self.sort,
--> 752 how=self.how)
753
754 def _get_join_info(self):
~/pandas/pandas/core/reshape/merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
1120
1121 # get left & right join labels and num. of levels at each location
-> 1122 llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))
1123
1124 # get flat i8 keys from label lists
~/pandas/pandas/core/reshape/merge.py in _factorize_keys(lk, rk, sort)
1554 elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
1555 klass = libhashtable.Int64Factorizer
-> 1556 lk = ensure_int64(com.values_from_object(lk))
1557 rk = ensure_int64(com.values_from_object(rk))
1558 else: 1557 rk = ensure_int64(com.values_from_object(rk))
1558 else:
~/pandas/pandas/_libs/algos_common_helper.pxi in pandas._libs.algos.ensure_int64()
ValueError: cannot convert float NaN to integer