ValueError when casting 1d data to MultiIndex column with incomplete labels and all data is object (wrong _is_consolidated output?) · Issue #27841 · pandas-dev/pandas (original) (raw)
Code Sample, a copy-pastable example if possible
In [2]: datum = '1'
In [3]: df = pd.DataFrame(datum, index=list('abcd'), ...: columns=pd.MultiIndex.from_product([['Main'], ('another', 'one')]))
In [4]: df['labels'] = 'a'
In [5]: df.loc[:, 'labels'] = df.index
ValueError Traceback (most recent call last) in ----> 1 df.loc[:, 'labels'] = df.index
~/nobackup/repo/pandas/pandas/core/indexing.py in setitem(self, key, value) 201 key = com.apply_if_callable(key, self.obj) 202 indexer = self._get_setitem_indexer(key) --> 203 self._setitem_with_indexer(indexer, value) 204 205 def _validate_key(self, key, axis: int):
~/nobackup/repo/pandas/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value) 572 # actually do the set 573 self.obj._consolidate_inplace() --> 574 self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) 575 self.obj._maybe_update_cacher(clear=True) 576
~/nobackup/repo/pandas/pandas/core/internals/managers.py in setitem(self, **kwargs) 558 559 def setitem(self, **kwargs): --> 560 return self.apply("setitem", **kwargs) 561 562 def putmask(self, **kwargs):
~/nobackup/repo/pandas/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs) 436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy) 437 --> 438 applied = getattr(b, f)(**kwargs) 439 result_blocks = _extend_blocks(applied, result_blocks) 440
~/nobackup/repo/pandas/pandas/core/internals/blocks.py in setitem(self, indexer, value) 890 # set 891 else: --> 892 values[indexer] = value 893 894 if transpose:
ValueError: could not broadcast input array from shape (4) into shape (4,1)
Problem description
Replace datum = '1'
with datum = 1
and everything works.
The problem might be related to the fact that
In [5]: df._data.blocks
Out[5]:
(ObjectBlock: slice(0, 2, 1), 2 x 4, dtype: object,
ObjectBlock: slice(2, 3, 1), 1 x 4, dtype: object)
In [6]: df._data._is_consolidated Out[6]: True
This is a regression, since it used to work in code of mine, but unfortunately I'm not sure which version this was working with.
Vaguely related to #27286 .
Expected Output
None, the assignment should just work as it does when data is numeric.
Output of pd.show_versions()
INSTALLED VERSIONS
commit : None
python : 3.7.3.final.0
python-bits : 64
OS : Linux
OS-release : 4.19.0-5-amd64
machine : x86_64
processor :
byteorder : little
LC_ALL : None
LANG : it_IT.UTF-8
LOCALE : it_IT.UTF-8
pandas : 0.25.0+141.g0227e69bb
numpy : 1.16.4
pytz : 2019.1
dateutil : 2.8.0
pip : 18.1
setuptools : 41.0.1
Cython : 0.29.2
pytest : 4.6.3
hypothesis : 3.71.11
sphinx : 1.8.4
blosc : None
feather : None
xlsxwriter : 1.1.2
lxml.etree : 4.3.2
html5lib : 1.0.1
pymysql : None
psycopg2 : None
jinja2 : 2.10.1
IPython : 7.5.0
pandas_datareader: v0.7.0+52.gb1b54d6.dirty
bs4 : 4.7.1
bottleneck : 1.2.1
fastparquet : None
gcsfs : None
lxml.etree : 4.3.2
matplotlib : 3.0.2
numexpr : 2.6.9
odfpy : None
openpyxl : 2.4.9
pandas_gbq : None
pyarrow : None
pytables : None
s3fs : None
scipy : 1.1.0
sqlalchemy : 1.2.18
tables : 3.4.4
xarray : None
xlrd : 1.1.0
xlwt : 1.3.0
xlsxwriter : 1.1.2