Better error message when fillna is used with NaN with category dtype · Issue #14021 · pandas-dev/pandas (original) (raw)
import numpy as np
tst = pd.DataFrame({'a':[1,2,1,np.nan],'b':[np.nan, np.nan, np.nan, np.nan]}, dtype='category')
tst.fillna(value=tst.median()) # here second column median is evaluated to np.nan
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
1875 try:
-> 1876 return self._engine.get_loc(key)
1877 except KeyError:
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4027)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3891)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12408)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12359)()
KeyError: <class 'object'>
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-127-6817ed5716b0> in <module>()
2 tst = pd.DataFrame({'a':[1,2,1,np.nan],
3 'b':[np.nan, np.nan, np.nan, np.nan]}, dtype='category')
----> 4 tst.fillna(value=tst.median())
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/frame.py in fillna(self, value, method, axis, inplace, limit, downcast, **kwargs)
2754 self).fillna(value=value, method=method, axis=axis,
2755 inplace=inplace, limit=limit,
-> 2756 downcast=downcast, **kwargs)
2757
2758 @Appender(_shared_docs['shift'] % _shared_doc_kwargs)
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/generic.py in fillna(self, value, method, axis, inplace, limit, downcast)
3164 continue
3165 obj = result[k]
-> 3166 obj.fillna(v, limit=limit, inplace=True)
3167 return result
3168 elif not com.is_list_like(value):
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/series.py in fillna(self, value, method, axis, inplace, limit, downcast, **kwargs)
2350 axis=axis, inplace=inplace,
2351 limit=limit, downcast=downcast,
-> 2352 **kwargs)
2353
2354 @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/generic.py in fillna(self, value, method, axis, inplace, limit, downcast)
3151 new_data = self._data.fillna(value=value, limit=limit,
3152 inplace=inplace,
-> 3153 downcast=downcast)
3154
3155 elif isinstance(value, (dict, com.ABCSeries)):
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/internals.py in fillna(self, **kwargs)
2865
2866 def fillna(self, **kwargs):
-> 2867 return self.apply('fillna', **kwargs)
2868
2869 def downcast(self, **kwargs):
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
2830
2831 kwargs['mgr'] = self
-> 2832 applied = getattr(b, f)(**kwargs)
2833 result_blocks = _extend_blocks(applied, result_blocks)
2834
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/internals.py in fillna(self, value, limit, inplace, downcast, mgr)
1884 values = self.values if inplace else self.values.copy()
1885 values = self._try_coerce_result(values.fillna(value=value,
-> 1886 limit=limit))
1887 return [self.make_block(values=values)]
1888
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/util/decorators.py in wrapper(*args, **kwargs)
89 else:
90 kwargs[new_arg_name] = new_arg_value
---> 91 return func(*args, **kwargs)
92 return wrapper
93 return _deprecate_kwarg
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/core/categorical.py in fillna(self, value, method, limit)
1415 if mask.any():
1416 values = values.copy()
-> 1417 values[mask] = self.categories.get_loc(value)
1418
1419 return Categorical(values, categories=self.categories,
/home/dan/.local/opt/miniconda3/envs/mathbs/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
1876 return self._engine.get_loc(key)
1877 except KeyError:
-> 1878 return self._engine.get_loc(self._maybe_cast_indexer(key))
1879
1880 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4027)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3891)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12408)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12359)()
KeyError: <class 'object'>
When trying the above with float, it works as expected (nans are "replaced" by nans).
a b
0 1.0 NaN
1 2.0 NaN
2 1.0 NaN
3 1.0 NaN
INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Linux
OS-release: 4.7.0-1-ARCH
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.18.0
nose: 1.3.7
pip: 8.1.1
setuptools: 20.3
Cython: 0.24.1
numpy: 1.10.4
scipy: 0.17.0
statsmodels: None
xarray: None
IPython: 4.1.2
sphinx: 1.4.1
patsy: None
dateutil: 2.5.2
pytz: 2016.3
blosc: None
bottleneck: None
tables: 3.2.2
numexpr: 2.5.1
matplotlib: 1.5.1
openpyxl: None
xlrd: 1.0.0
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.4.1
html5lib: 0.999
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None