Better error message when fillna is used with NaN with category dtype · Issue #14021 · pandas-dev/pandas (original) (raw)

import numpy as np
tst = pd.DataFrame({'a':[1,2,1,np.nan],'b':[np.nan, np.nan, np.nan, np.nan]}, dtype='category')
tst.fillna(value=tst.median())  # here second column median is evaluated to np.nan
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   1875             try:
-> 1876                 return self._engine.get_loc(key)
   1877             except KeyError:

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4027)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3891)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12408)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12359)()

KeyError: <class 'object'>

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-127-6817ed5716b0> in <module>()
      2 tst = pd.DataFrame({'a':[1,2,1,np.nan],
      3                    'b':[np.nan, np.nan, np.nan, np.nan]}, dtype='category')
----> 4 tst.fillna(value=tst.median())

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/frame.py in fillna(self, value, method, axis, inplace, limit, downcast, **kwargs)
   2754                      self).fillna(value=value, method=method, axis=axis,
   2755                                   inplace=inplace, limit=limit,
-> 2756                                   downcast=downcast, **kwargs)
   2757 
   2758     @Appender(_shared_docs['shift'] % _shared_doc_kwargs)

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/generic.py in fillna(self, value, method, axis, inplace, limit, downcast)
   3164                         continue
   3165                     obj = result[k]
-> 3166                     obj.fillna(v, limit=limit, inplace=True)
   3167                 return result
   3168             elif not com.is_list_like(value):

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/series.py in fillna(self, value, method, axis, inplace, limit, downcast, **kwargs)
   2350                                           axis=axis, inplace=inplace,
   2351                                           limit=limit, downcast=downcast,
-> 2352                                           **kwargs)
   2353 
   2354     @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/generic.py in fillna(self, value, method, axis, inplace, limit, downcast)
   3151                 new_data = self._data.fillna(value=value, limit=limit,
   3152                                              inplace=inplace,
-> 3153                                              downcast=downcast)
   3154 
   3155             elif isinstance(value, (dict, com.ABCSeries)):

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/internals.py in fillna(self, **kwargs)
   2865 
   2866     def fillna(self, **kwargs):
-> 2867         return self.apply('fillna', **kwargs)
   2868 
   2869     def downcast(self, **kwargs):

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
   2830 
   2831             kwargs['mgr'] = self
-> 2832             applied = getattr(b, f)(**kwargs)
   2833             result_blocks = _extend_blocks(applied, result_blocks)
   2834 

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/internals.py in fillna(self, value, limit, inplace, downcast, mgr)
   1884         values = self.values if inplace else self.values.copy()
   1885         values = self._try_coerce_result(values.fillna(value=value,
-> 1886                                                        limit=limit))
   1887         return [self.make_block(values=values)]
   1888 

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/util/decorators.py in wrapper(*args, **kwargs)
     89                 else:
     90                     kwargs[new_arg_name] = new_arg_value
---> 91             return func(*args, **kwargs)
     92         return wrapper
     93     return _deprecate_kwarg

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/categorical.py in fillna(self, value, method, limit)
   1415             if mask.any():
   1416                 values = values.copy()
-> 1417                 values[mask] = self.categories.get_loc(value)
   1418 
   1419         return Categorical(values, categories=self.categories,

/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   1876                 return self._engine.get_loc(key)
   1877             except KeyError:
-> 1878                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   1879 
   1880         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4027)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3891)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12408)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12359)()

KeyError: <class 'object'>

When trying the above with float, it works as expected (nans are "replaced" by nans).

      a b
0   1.0 NaN
1   2.0 NaN
2   1.0 NaN
3   1.0 NaN
INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Linux
OS-release: 4.7.0-1-ARCH
machine: x86_64
processor: 
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8

pandas: 0.18.0
nose: 1.3.7
pip: 8.1.1
setuptools: 20.3
Cython: 0.24.1
numpy: 1.10.4
scipy: 0.17.0
statsmodels: None
xarray: None
IPython: 4.1.2
sphinx: 1.4.1
patsy: None
dateutil: 2.5.2
pytz: 2016.3
blosc: None
bottleneck: None
tables: 3.2.2
numexpr: 2.5.1
matplotlib: 1.5.1
openpyxl: None
xlrd: 1.0.0
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.4.1
html5lib: 0.999
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None