Bug: Pivot fails for MultiIndex If existing index is used. · Issue #23955 · pandas-dev/pandas (original) (raw)

Code Sample, a copy-pastable example

df = pd.DataFrame([['A', 'A1', 'label1', 1], ['A', 'A2', 'label2', 2], ['B', 'A1', 'label1', 3], ['B', 'A2', 'label2', 4]], columns=['index_1', 'index_2', 'label', 'value']) df = df.set_index(['index_1', 'index_2'])

pivoted_df = df.pivot(index=None, columns='label', values = 'value')

Problem description

Pivot function give an error NotImplementedError: isna is not defined for MultiIndex. When index is set to None.


NotImplementedError Traceback (most recent call last) in () 2 pivoted_df = df.pivot(index=None, 3 columns='label', ----> 4 values = 'value')

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in pivot(self, index, columns, values) 5192 """ 5193 from pandas.core.reshape.reshape import pivot -> 5194 return pivot(self, index=index, columns=columns, values=values) 5195 5196 _shared_docs['pivot_table'] = """

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in pivot(self, index, columns, values) 404 else: 405 index = self[index] --> 406 index = MultiIndex.from_arrays([index, self[columns]]) 407 408 if is_list_like(values) and not isinstance(values, tuple):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\multi.py in from_arrays(cls, arrays, sortorder, names) 1272 from pandas.core.arrays.categorical import _factorize_from_iterables 1273 -> 1274 labels, levels = _factorize_from_iterables(arrays) 1275 if names is None: 1276 names = [getattr(arr, "name", None) for arr in arrays]

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterables(iterables) 2541 # For consistency, it should return a list of 2 lists. 2542 return [[], []] -> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in (.0) 2541 # For consistency, it should return a list of 2 lists. 2542 return [[], []] -> 2543 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in _factorize_from_iterable(values) 2513 codes = values.codes 2514 else: -> 2515 cat = Categorical(values, ordered=True) 2516 categories = cat.categories 2517 codes = cat.codes

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py in init(self, values, categories, ordered, dtype, fastpath) 359 360 # we're inferring from values --> 361 dtype = CategoricalDtype(categories, dtype.ordered) 362 363 elif is_categorical_dtype(values):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in init(self, categories, ordered) 136 137 def init(self, categories=None, ordered=None): --> 138 self._finalize(categories, ordered, fastpath=False) 139 140 @classmethod

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in _finalize(self, categories, ordered, fastpath) 161 if categories is not None: 162 categories = self.validate_categories(categories, --> 163 fastpath=fastpath) 164 165 self._categories = categories

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\dtypes.py in validate_categories(categories, fastpath) 318 if not fastpath: 319 --> 320 if categories.hasnans: 321 raise ValueError('Categorial categories cannot be null') 322

pandas_libs\properties.pyx in pandas._libs.properties.CachedProperty.get()

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in hasnans(self) 2237 """ return if I have any nans; enables various perf speedups """ 2238 if self._can_hold_na: -> 2239 return self._isnan.any() 2240 else: 2241 return False

pandas_libs\properties.pyx in pandas._libs.properties.CachedProperty.get()

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _isnan(self) 2218 """ return if each value is nan""" 2219 if self._can_hold_na: -> 2220 return isna(self) 2221 else: 2222 # shouldn't reach to this condition by checking hasnans beforehand

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in isna(obj) 104 Name: 1, dtype: bool 105 """ --> 106 return _isna(obj) 107 108

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\dtypes\missing.py in _isna_new(obj) 115 # hack (for now) because MI registers as ndarray 116 elif isinstance(obj, ABCMultiIndex): --> 117 raise NotImplementedError("isna is not defined for MultiIndex") 118 elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, 119 ABCExtensionArray)):

NotImplementedError: isna is not defined for MultiIndex

Expected Output

index_1 index_2 label1 label2
A A1 1.0 NaN
A2 NaN 2.0
B A1 3.0 NaN
A2 NaN 4.0

Output of pd.show_versions()

INSTALLED VERSIONS ------------------ commit: None python: 3.6.5.final.0 python-bits: 64 OS: Windows OS-release: 10 machine: AMD64 processor: Intel64 Family 6 Model 85 Stepping 4, GenuineIntel byteorder: little LC_ALL: None LANG: None LOCALE: None.None

pandas: 0.23.4
pytest: 3.5.1
pip: 10.0.1
setuptools: 39.1.0
Cython: 0.28.2
numpy: 1.15.4
scipy: 1.1.0
pyarrow: None
xarray: None
IPython: 6.4.0
sphinx: 1.7.4
patsy: 0.5.0
dateutil: 2.7.3
pytz: 2018.4
blosc: None
bottleneck: 1.2.1
tables: 3.4.3
numexpr: 2.6.5
feather: None
matplotlib: 2.2.2
openpyxl: 2.5.3
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.4
lxml: 4.2.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.7
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None