BUG: Joining on non-unique PeriodIndex fails · Issue #16871 · pandas-dev/pandas (original) (raw)

I reported this in #GH16541 but I guess it fell through the cracks. Here is the stack trace:

TypeError                                 Traceback (most recent call last)
<ipython-input-2-c7c6bdf18c3f> in <module>()
      3                      index=perindex, columns=['pnum'])
      4 df2 = pd.concat([perdf, perdf])
----> 5 perdf.merge(df2, left_index=True, right_index=True, how='outer')

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
   4720                      right_on=right_on, left_index=left_index,
   4721                      right_index=right_index, sort=sort, suffixes=suffixes,
-> 4722                      copy=copy, indicator=indicator)
   4723 
   4724     def round(self, decimals=0, *args, **kwargs):

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
     52                          right_index=right_index, sort=sort, suffixes=suffixes,
     53                          copy=copy, indicator=indicator)
---> 54     return op.get_result()
     55 
     56 

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in get_result(self)
    567                 self.left, self.right)
    568 
--> 569         join_index, left_indexer, right_indexer = self._get_join_info()
    570 
    571         ldata, rdata = self.left._data, self.right._data

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _get_join_info(self)
    720             join_index, left_indexer, right_indexer = \
    721                 left_ax.join(right_ax, how=self.how, return_indexers=True,
--> 722                              sort=self.sort)
    723         elif self.right_index and self.how == 'left':
    724             join_index, left_indexer, right_indexer = \

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\period.py in join(self, other, how, level, return_indexers, sort)
    929         result = Int64Index.join(self, other, how=how, level=level,
    930                                  return_indexers=return_indexers,
--> 931                                  sort=sort)
    932 
    933         if return_indexers:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\base.py in join(self, other, how, level, return_indexers, sort)
   3044             else:
   3045                 return self._join_non_unique(other, how=how,
-> 3046                                              return_indexers=return_indexers)
   3047         elif self.is_monotonic and other.is_monotonic:
   3048             try:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\base.py in _join_non_unique(self, other, how, return_indexers)
   3125         left_idx, right_idx = _get_join_indexers([self.values],
   3126                                                  [other._values], how=how,
-> 3127                                                  sort=True)
   3128 
   3129         left_idx = _ensure_platform_int(left_idx)

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
    980 
    981     # get left & right join labels and num. of levels at each location
--> 982     llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))
    983 
    984     # get flat i8 keys from label lists

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _factorize_keys(lk, rk, sort)
   1410     if sort:
   1411         uniques = rizer.uniques.to_array()
-> 1412         llab, rlab = _sort_labels(uniques, llab, rlab)
   1413 
   1414     # NA group

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _sort_labels(uniques, left, right)
   1436     labels = np.concatenate([left, right])
   1437 
-> 1438     _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1)
   1439     new_labels = _ensure_int64(new_labels)
   1440     new_left, new_right = new_labels[:l], new_labels[l:]

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\algorithms.py in safe_sort(values, labels, na_sentinel, assume_unique)
    481     if compat.PY3 and lib.infer_dtype(values) == 'mixed-integer':
    482         # unorderable in py3 if mixed str/int
--> 483         ordered = sort_mixed(values)
    484     else:
    485         try:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\algorithms.py in sort_mixed(values)
    474         str_pos = np.array([isinstance(x, string_types) for x in values],
    475                            dtype=bool)
--> 476         nums = np.sort(values[~str_pos])
    477         strs = np.sort(values[str_pos])
    478         return _ensure_object(np.concatenate([nums, strs]))

C:\Anaconda3\envs\py36\lib\site-packages\numpy\core\fromnumeric.py in sort(a, axis, kind, order)
    820     else:
    821         a = asanyarray(a).copy(order="K")
--> 822     a.sort(axis=axis, kind=kind, order=order)
    823     return a
    824 

pandas/_libs/period.pyx in pandas._libs.period._Period.__richcmp__ (pandas\_libs\period.c:12067)()

TypeError: Cannot compare type 'Period' with type 'int'