BUG: pivot_table with margins=True fails for categorical dtype (original) (raw)

First, an example that works as expected (non-categorical):

In [22]: pd.version Out[22]: '0.16.2'

In [23]: data = pd.DataFrame({'x': np.arange(99), 'y': np.arange(99) // 50, 'z': np.arange(99) % 3})

In [24]: data.pivot_table('x', 'y', 'z') Out[24]: z 0 1 2 y
0 24.0 25.0 24.5 1 73.5 74.5 74.0

In [25]: data.pivot_table('x', 'y', 'z', margins=True) Out[25]: z 0 1 2 All y
0 24.0 25.0 24.5 24.5 1 73.5 74.5 74.0 74.0 All 48.0 49.0 50.0 49.0

Now convert y and z to categories; pivot table works without margins but fails with:

In [27]: data.y = data.y.astype('category')

In [28]: data.z = data.z.astype('category')

In [29]: data.pivot_table('x', 'y', 'z') Out[29]: z 0 1 2 y
0 24.0 25.0 24.5 1 73.5 74.5 74.0

In [32]: data.pivot_table('x', 'y', 'z', margins=True)

KeyError Traceback (most recent call last) /Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in set(self, item, value, check) 2979 try: -> 2980 loc = self.items.get_loc(item) 2981 except KeyError:

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in get_loc(self, key, method) 5072 key = tuple(map(_maybe_str_to_time_stamp, key, self.levels)) -> 5073 return self._engine.get_loc(key) 5074

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3824)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3704)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12280)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12231)()

KeyError: ('x', 'All')

During handling of the above exception, another exception occurred:

TypeError Traceback (most recent call last) in () ----> 1 data.pivot_table('x', 'y', 'z', margins=True)

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna) 141 if margins: 142 table = _add_margins(table, data, values, rows=index, --> 143 cols=columns, aggfunc=aggfunc) 144 145 # discard the top level

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in _add_margins(table, data, values, rows, cols, aggfunc) 167 168 if values: --> 169 marginal_result_set = _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin) 170 if not isinstance(marginal_result_set, tuple): 171 return marginal_result_set

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin) 236 # we are going to mutate this, so need to copy! 237 piece = piece.copy() --> 238 piece[all_key] = margin[key] 239 240 table_pieces.append(piece)

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/frame.py in setitem(self, key, value) 2125 else: 2126 # set column -> 2127 self._set_item(key, value) 2128 2129 def _setitem_slice(self, key, value):

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/frame.py in _set_item(self, key, value) 2203 self._ensure_valid_index(value) 2204 value = self._sanitize_column(key, value) -> 2205 NDFrame._set_item(self, key, value) 2206 2207 # check if we are modifying a copy

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/generic.py in _set_item(self, key, value) 1194 1195 def _set_item(self, key, value): -> 1196 self._data.set(key, value) 1197 self._clear_item_cache() 1198

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in set(self, item, value, check) 2981 except KeyError: 2982 # This item wasn't present, just insert at end -> 2983 self.insert(len(self.items), item, value) 2984 return 2985

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates) 3100 self._blknos = np.insert(self._blknos, loc, len(self.blocks)) 3101 -> 3102 self.axes[0] = self.items.insert(loc, item) 3103 3104 self.blocks += (block,)

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in insert(self, loc, item) 5583 # other labels 5584 lev_loc = len(level) -> 5585 level = level.insert(lev_loc, k) 5586 else: 5587 lev_loc = level.get_loc(k)

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in insert(self, loc, item) 3217 code = self.categories.get_indexer([item]) 3218 if (code == -1): -> 3219 raise TypeError("cannot insert an item into a CategoricalIndex that is not already an existing category") 3220 3221 codes = self.codes

TypeError: cannot insert an item into a CategoricalIndex that is not already an existing category