BUG: Error writing DataFrame with categorical type column and interval data to a CSV file · Issue #46297 · pandas-dev/pandas (original) (raw)
I get the following error message when trying to run the example above. The error seems to be linked to writing an interval of type category
to a CSV file.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [41], in <module>
----> 1 df.to_csv("test.csv")
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\core\generic.py:3563, in NDFrame.to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)
3552 df = self if isinstance(self, ABCDataFrame) else self.to_frame()
3554 formatter = DataFrameFormatter(
3555 frame=df,
3556 header=header,
(...)
3560 decimal=decimal,
3561 )
-> 3563 return DataFrameRenderer(formatter).to_csv(
3564 path_or_buf,
3565 line_terminator=line_terminator,
3566 sep=sep,
3567 encoding=encoding,
3568 errors=errors,
3569 compression=compression,
3570 quoting=quoting,
3571 columns=columns,
3572 index_label=index_label,
3573 mode=mode,
3574 chunksize=chunksize,
3575 quotechar=quotechar,
3576 date_format=date_format,
3577 doublequote=doublequote,
3578 escapechar=escapechar,
3579 storage_options=storage_options,
3580 )
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\io\formats\format.py:1180, in DataFrameRenderer.to_csv(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)
1159 created_buffer = False
1161 csv_formatter = CSVFormatter(
1162 path_or_buf=path_or_buf,
1163 line_terminator=line_terminator,
(...)
1178 formatter=self.fmt,
1179 )
-> 1180 csv_formatter.save()
1182 if created_buffer:
1183 assert isinstance(path_or_buf, StringIO)
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\io\formats\csvs.py:261, in CSVFormatter.save(self)
241 with get_handle(
242 self.filepath_or_buffer,
243 self.mode,
(...)
249
250 # Note: self.encoding is irrelevant here
251 self.writer = csvlib.writer(
252 handles.handle,
253 lineterminator=self.line_terminator,
(...)
258 quotechar=self.quotechar,
259 )
--> 261 self._save()
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\io\formats\csvs.py:266, in CSVFormatter._save(self)
264 if self._need_to_save_header:
265 self._save_header()
--> 266 self._save_body()
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\io\formats\csvs.py:304, in CSVFormatter._save_body(self)
302 if start_i >= end_i:
303 break
--> 304 self._save_chunk(start_i, end_i)
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\io\formats\csvs.py:311, in CSVFormatter._save_chunk(self, start_i, end_i)
308 slicer = slice(start_i, end_i)
309 df = self.obj.iloc[slicer]
--> 311 res = df._mgr.to_native_types(**self._number_format)
312 data = [res.iget_values(i) for i in range(len(res.items))]
314 ix = self.data_index[slicer]._format_native_types(**self._number_format)
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\core\internals\managers.py:473, in BaseBlockManager.to_native_types(self, **kwargs)
468 def to_native_types(self: T, **kwargs) -> T:
469 """
470 Convert values to native types (strings / python objects) that are used
471 in formatting (repr / csv).
472 """
--> 473 return self.apply("to_native_types", **kwargs)
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\core\internals\managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
302 applied = b.apply(f, **kwargs)
303 else:
--> 304 applied = getattr(b, f)(**kwargs)
305 except (TypeError, NotImplementedError):
306 if not ignore_failures:
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\core\internals\blocks.py:636, in Block.to_native_types(self, na_rep, quoting, **kwargs)
633 @final
634 def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
635 """convert to our native types format"""
--> 636 result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs)
637 return self.make_block(result)
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\core\internals\blocks.py:2148, in to_native_types(values, na_rep, quoting, float_format, decimal, **kwargs)
2145 """convert to our native types format"""
2146 if isinstance(values, Categorical):
2147 # GH#40754 Convert categorical datetimes to datetime array
-> 2148 values = take_nd(
2149 values.categories._values,
2150 ensure_platform_int(values._codes),
2151 fill_value=na_rep,
2152 )
2154 values = ensure_wrapped_if_datetimelike(values)
2156 if isinstance(values, (DatetimeArray, TimedeltaArray)):
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\core\array_algos\take.py:114, in take_nd(arr, indexer, axis, fill_value, allow_fill)
109 arr = cast("NDArrayBackedExtensionArray", arr)
110 return arr.take(
111 indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
112 )
--> 114 return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
116 arr = np.asarray(arr)
117 return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\core\arrays\interval.py:1060, in IntervalArray.take(self, indices, allow_fill, fill_value, axis, **kwargs)
1058 fill_left = fill_right = fill_value
1059 if allow_fill:
-> 1060 fill_left, fill_right = self._validate_scalar(fill_value)
1062 left_take = take(
1063 self._left, indices, allow_fill=allow_fill, fill_value=fill_left
1064 )
1065 right_take = take(
1066 self._right, indices, allow_fill=allow_fill, fill_value=fill_right
1067 )
File ~\Anaconda3\envs\wedev\lib\site-packages\pandas\core\arrays\interval.py:1102, in IntervalArray._validate_scalar(self, value)
1100 left = right = value
1101 else:
-> 1102 raise TypeError(
1103 "can only insert Interval objects and NA into an IntervalArray"
1104 )
1105 return left, right
TypeError: can only insert Interval objects and NA into an IntervalArray
I expect the writing to a CSV to work successfully as is the case if I replace astype("category")
with astype("object")
in the example above.