BUG: assigning Series.array / PandasArray to column fails · Issue #26390 · pandas-dev/pandas (original) (raw)

Assigning a PandasArray (so also the result of df['a'].array) of the correct length to add a column fails:

In [1]: df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd']})

In [2]: df['c'] = pd.array([1, 2, None, 3])

KeyError Traceback (most recent call last) ~/scipy/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 2672 try: -> 2673 return self._engine.get_loc(key) 2674 except KeyError:

~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'c'

During handling of the above exception, another exception occurred:

KeyError Traceback (most recent call last) ~/scipy/pandas/pandas/core/internals/managers.py in set(self, item, value) 1048 try: -> 1049 loc = self.items.get_loc(item) 1050 except KeyError:

~/scipy/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 2674 except KeyError: -> 2675 return self._engine.get_loc(self._maybe_cast_indexer(key)) 2676 indexer = self.get_indexer([key], method=method, tolerance=tolerance)

~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'c'

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last) in ----> 1 df['c'] = pd.array([1, 2, None, 3])

~/scipy/pandas/pandas/core/frame.py in setitem(self, key, value) 3334 else: 3335 # set column -> 3336 self._set_item(key, value) 3337 3338 def _setitem_slice(self, key, value):

~/scipy/pandas/pandas/core/frame.py in _set_item(self, key, value) 3410 self._ensure_valid_index(value) 3411 value = self._sanitize_column(key, value) -> 3412 NDFrame._set_item(self, key, value) 3413 3414 # check if we are modifying a copy

~/scipy/pandas/pandas/core/generic.py in _set_item(self, key, value) 3232 3233 def _set_item(self, key, value): -> 3234 self._data.set(key, value) 3235 self._clear_item_cache() 3236

~/scipy/pandas/pandas/core/internals/managers.py in set(self, item, value) 1050 except KeyError: 1051 # This item wasn't present, just insert at end -> 1052 self.insert(len(self.items), item, value) 1053 return 1054

~/scipy/pandas/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates) 1152 1153 block = make_block(values=value, ndim=self.ndim, -> 1154 placement=slice(loc, loc + 1)) 1155 1156 for blkno, count in _fast_count_smallints(self._blknos[loc:]):

~/scipy/pandas/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype, fastpath) 3052 values = DatetimeArray._simple_new(values, dtype=dtype) 3053 -> 3054 return klass(values, ndim=ndim, placement=placement) 3055 3056

~/scipy/pandas/pandas/core/internals/blocks.py in init(self, values, placement, ndim) 2584 values = np.array(values, dtype=object) 2585 -> 2586 super().init(values, ndim=ndim, placement=placement) 2587 2588 @property

~/scipy/pandas/pandas/core/internals/blocks.py in init(self, values, placement, ndim) 74 75 def init(self, values, placement, ndim=None): ---> 76 self.ndim = self._check_ndim(values, ndim) 77 self.mgr_locs = placement 78 self.values = values

~/scipy/pandas/pandas/core/internals/blocks.py in _check_ndim(self, values, ndim) 111 msg = ("Wrong number of dimensions. values.ndim != ndim " 112 "[{} != {}]") --> 113 raise ValueError(msg.format(values.ndim, ndim)) 114 115 return ndim

ValueError: Wrong number of dimensions. values.ndim != ndim [1 != 2]

Note this only fails for the PandasArray types (so when creating a FloatBlock or IntBlock, .. which expect 2D data, so when not creating an ExtensionBlock as is done for an "actual" ExtensionArray).