BUG: Putting a Categorical series in a DataFrame with a different index raises IndexError · Issue #8076 · pandas-dev/pandas (original) (raw)

>>> import pandas as pd
>>> pd.DataFrame({'x': pd.Series(['a', 'b', 'c'])}, index=pd.date_range('20000101', periods=3))
              x
2000-01-01  NaN
2000-01-02  NaN
2000-01-03  NaN
>>> df = pd.DataFrame({'x': pd.Series(pd.Categorical(['a', 'b', 'c']))}, index=pd.date_range('20000101', periods=3))
>>> df
<repr(<pandas.core.frame.DataFrame at 0x107f09f50>) failed: IndexError: Out of bounds on buffer access (axis 0)>
>>> df.values
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-13-e8bb9a30bd4f> in <module>()
----> 1 df.values

/Users/shoyer/dev/pandas/pandas/core/generic.py in values(self)
   2071         int32.
   2072         """
-> 2073         return self.as_matrix()
   2074 
   2075     @property

/Users/shoyer/dev/pandas/pandas/core/generic.py in as_matrix(self, columns)
   2053         self._consolidate_inplace()
   2054         if self._AXIS_REVERSED:
-> 2055             return self._data.as_matrix(columns).T
   2056         return self._data.as_matrix(columns)
   2057 

/Users/shoyer/dev/pandas/pandas/core/internals.py in as_matrix(self, items)
   2676 
   2677         if self._is_single_block or not self.is_mixed_type:
-> 2678             return mgr.blocks[0].get_values()
   2679         else:
   2680             return mgr._interleave()

/Users/shoyer/dev/pandas/pandas/core/internals.py in get_values(self, dtype)
   1079     def get_values(self, dtype=None):
   1080         """ need to to_dense myself (and always return a ndim sized object) """
-> 1081         values = self.values.to_dense()
   1082         if values.ndim == self.ndim - 1:
   1083             values = values.reshape((1,) + values.shape)

/Users/shoyer/dev/pandas/pandas/core/categorical.py in to_dense(self)
    683     def to_dense(self):
    684         """ Return my 'dense' repr """
--> 685         return np.asarray(self)
    686 
    687     def fillna(self, fill_value=None, method=None, limit=None, **kwargs):

/Users/shoyer/miniconda/envs/pandas-dev/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
    458 
    459     """
--> 460     return array(a, dtype, copy=False, order=order)
    461 
    462 def asanyarray(a, dtype=None, order=None):

/Users/shoyer/dev/pandas/pandas/core/categorical.py in __array__(self, dtype)
    484             dtype as categorical.levels.dtype
    485         """
--> 486         ret = com.take_1d(self.levels.values, self._codes)
    487         if dtype and dtype != self.levels.dtype:
    488             return np.asarray(ret, dtype)

/Users/shoyer/dev/pandas/pandas/core/common.py in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
    805                                  axis=axis, mask_info=mask_info)
    806 
--> 807     func(arr, indexer, out, fill_value)
    808 
    809     if flip_order:

/Users/shoyer/dev/pandas/pandas/algos.so in pandas.algos.take_1d_object_object (pandas/algos.c:78943)()

IndexError: Out of bounds on buffer access (axis 0)

This is on master.