BUG: Reading fails when dtype
is defined with bool[pyarrow]
· Issue #53390 · pandas-dev/pandas (original) (raw)
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:912, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
899 kwds_defaults = _refine_defaults_read(
900 dialect,
901 delimiter,
(...)
908 dtype_backend=dtype_backend,
909 )
910 kwds.update(kwds_defaults)
--> 912 return _read(filepath_or_buffer, kwds)
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:583, in _read(filepath_or_buffer, kwds)
580 return parser
582 with parser:
--> 583 return parser.read(nrows)
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1704, in TextFileReader.read(self, nrows)
1697 nrows = validate_integer("nrows", nrows)
1698 try:
1699 # error: "ParserBase" has no attribute "read"
1700 (
1701 index,
1702 columns,
1703 col_dict,
-> 1704 ) = self._engine.read( # type: ignore[attr-defined]
1705 nrows
1706 )
1707 except Exception:
1708 self.close()
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:234, in CParserWrapper.read(self, nrows)
232 try:
233 if self.low_memory:
--> 234 chunks = self._reader.read_low_memory(nrows)
235 # destructive to chunks
236 data = _concatenate_chunks(chunks)
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:812, in pandas._libs.parsers.TextReader.read_low_memory()
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:889, in pandas._libs.parsers.TextReader._read_rows()
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:1034, in pandas._libs.parsers.TextReader._convert_column_data()
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:1073, in pandas._libs.parsers.TextReader._convert_tokens()
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:1173, in pandas._libs.parsers.TextReader._convert_with_dtype()
TypeError: _from_sequence_of_strings() got an unexpected keyword argument 'true_values'
In the above example, I would expect that the CSV data is loaded onto a DataFrame with PyArrow-backed types. This behavior works when the type is a string, int or float. However, it produces an error when bool[arrow]
is specified.