BUG: Series construction with EA dtype and index but no data fails · Issue #26469 · pandas-dev/pandas (original) (raw)

In [8]: pd.Series(None, index=[1, 2, 3], dtype='Int64')                                                                                                                                                             
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/scipy/pandas/pandas/core/internals/construction.py in _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure)
    691         if is_integer_dtype(dtype):
--> 692             subarr = maybe_cast_to_integer_array(arr, dtype)
    693 

~/scipy/pandas/pandas/core/dtypes/cast.py in maybe_cast_to_integer_array(arr, dtype, copy)
   1311         if not hasattr(arr, "astype"):
-> 1312             casted = np.array(arr, dtype=dtype, copy=copy)
   1313         else:

TypeError: data type not understood

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-8-9447295feee6> in <module>
----> 1 pd.Series(None, index=[1, 2, 3], dtype='Int64')

~/scipy/pandas/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    202                 data = data._data
    203             elif isinstance(data, dict):
--> 204                 data, index = self._init_dict(data, index, dtype)
    205                 dtype = None
    206                 copy = False

~/scipy/pandas/pandas/core/series.py in _init_dict(self, data, index, dtype)
    295 
    296         # Input is now list-like, so rely on "standard" construction:
--> 297         s = Series(values, index=keys, dtype=dtype)
    298 
    299         # Now we just make sure the order is respected, if any

~/scipy/pandas/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    253             else:
    254                 data = sanitize_array(data, index, dtype, copy,
--> 255                                       raise_cast_failure=True)
    256 
    257                 data = SingleBlockManager(data, index, fastpath=True)

~/scipy/pandas/pandas/core/internals/construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure)
    620         subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
    621     else:
--> 622         subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)
    623 
    624     # scalar like, GH

~/scipy/pandas/pandas/core/internals/construction.py in _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure)
    711             # create an extension array from its dtype
    712             array_type = dtype.construct_array_type()._from_sequence
--> 713             subarr = array_type(arr, dtype=dtype, copy=copy)
    714         elif dtype is not None and raise_cast_failure:
    715             raise

~/scipy/pandas/pandas/core/arrays/integer.py in _from_sequence(cls, scalars, dtype, copy)
    305     @classmethod
    306     def _from_sequence(cls, scalars, dtype=None, copy=False):
--> 307         return integer_array(scalars, dtype=dtype, copy=copy)
    308 
    309     @classmethod

~/scipy/pandas/pandas/core/arrays/integer.py in integer_array(values, dtype, copy)
    110     TypeError if incompatible types
    111     """
--> 112     values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
    113     return IntegerArray(values, mask)
    114 

~/scipy/pandas/pandas/core/arrays/integer.py in coerce_to_array(values, dtype, mask, copy)
    202 
    203     if not values.ndim == 1:
--> 204         raise TypeError("values must be a 1D list-like")
    205     if not mask.ndim == 1:
    206         raise TypeError("mask must be a 1D list-like")

TypeError: values must be a 1D list-like

while this works fine for non-EA dtypes.

The problem is that the None / np.nan is eventually passed to the _from_sequence method, which expects a list-like. I don't think we should let people fix their _from_sequence, but we should rather make sure we never pass that to it.