BUG: Reading fails when dtype is defined with bool[pyarrow] · Issue #53390 · pandas-dev/pandas (original) (raw)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:912, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
    899 kwds_defaults = _refine_defaults_read(
    900     dialect,
    901     delimiter,
   (...)
    908     dtype_backend=dtype_backend,
    909 )
    910 kwds.update(kwds_defaults)
--> 912 return _read(filepath_or_buffer, kwds)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:583, in _read(filepath_or_buffer, kwds)
    580     return parser
    582 with parser:
--> 583     return parser.read(nrows)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1704, in TextFileReader.read(self, nrows)
   1697 nrows = validate_integer("nrows", nrows)
   1698 try:
   1699     # error: "ParserBase" has no attribute "read"
   1700     (
   1701         index,
   1702         columns,
   1703         col_dict,
-> 1704     ) = self._engine.read(  # type: ignore[attr-defined]
   1705         nrows
   1706     )
   1707 except Exception:
   1708     self.close()

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:234, in CParserWrapper.read(self, nrows)
    232 try:
    233     if self.low_memory:
--> 234         chunks = self._reader.read_low_memory(nrows)
    235         # destructive to chunks
    236         data = _concatenate_chunks(chunks)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:812, in pandas._libs.parsers.TextReader.read_low_memory()

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:889, in pandas._libs.parsers.TextReader._read_rows()

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:1034, in pandas._libs.parsers.TextReader._convert_column_data()

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:1073, in pandas._libs.parsers.TextReader._convert_tokens()

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/parsers.pyx:1173, in pandas._libs.parsers.TextReader._convert_with_dtype()

TypeError: _from_sequence_of_strings() got an unexpected keyword argument 'true_values'

In the above example, I would expect that the CSV data is loaded onto a DataFrame with PyArrow-backed types. This behavior works when the type is a string, int or float. However, it produces an error when bool[arrow] is specified.