ENH: support EA types in read_csv · Issue #23228 · pandas-dev/pandas (original) (raw)
In [3]: df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int64'), 'A': [1, 2, 1]})
...: df
...:
Out[3]:
Int A
0 1 1
1 2 2
2 3 1
In [4]: data = df.to_csv(index=False)
In [5]: data
Out[5]: 'Int,A\n1,1\n2,2\n3,1\n'
In [6]: from io import StringIO
In [7]: pd.read_csv(StringIO(data), dtype={'Int': 'Int64'})~/pandas/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_rows()
968
969 self._start_clock()
--> 970 columns = self._convert_column_data(rows=rows,
971 footer=footer,
972 upcast_na=True)
~/pandas/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._convert_column_data()
1096
1097 # Should return as the desired dtype (inferred or specified)
-> 1098 col_res, na_count = self._convert_tokens(
1099 i, start, end, name, na_filter, na_hashset,
1100 na_flist, col_dtype)
~/pandas/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._convert_tokens()
1121
1122 if col_dtype is not None:
-> 1123 col_res, na_count = self._convert_with_dtype(
1124 col_dtype, i, start, end, na_filter,
1125 1, na_hashset, na_flist)
~/pandas/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._convert_with_dtype()
1249 "using parse_dates instead".format(dtype=dtype))
1250 else:
-> 1251 raise TypeError("the dtype {dtype} is not "
1252 "supported for parsing".format(dtype=dtype))
1253
TypeError: the dtype Int64 is not supported for parsing
we already support Categorical, would be nice to have a general interface to this
In [8]: from pandas.core.arrays.integer import Int64Dtype
In [9]: pd.read_csv(StringIO(data), dtype={'Int': Int64Dtype})
Out[9]:
Int A
0 1 1
1 2 2
2 3 1
In [10]: pd.read_csv(StringIO(data), dtype={'Int': Int64Dtype}).dtypes
Out[10]:
Int object
A int64
dtype: object