BUG: read_csv skipfooter fails with invalid quoted line · Issue #15910 · pandas-dev/pandas (original) (raw)

from pandas.compat import StringIO

pd.read_csv(StringIO('''Date,Value 1/1/2012,100.00 1/2/2012,102.00 "a quoted junk row"morejunk'''), skipfooter=1)

Out[21] ERROR:root:An unexpected error occurred while tokenizing input The following traceback may be corrupted or invalid The error message is: ('EOF in multi-line string', (1, 20))


Error Traceback (most recent call last) in () 2 1/1/2012,100.00 3 1/2/2012,102.00 ----> 4 "a quoted junk row" '''), skipfooter=1)

C:\Users\chris.bartak\Documents\python-dev\pandas\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision) 651 skip_blank_lines=skip_blank_lines) 652 --> 653 return _read(filepath_or_buffer, kwds) 654 655 parser_f.name = name

C:\Users\chris.bartak\Documents\python-dev\pandas\pandas\io\parsers.py in _read(filepath_or_buffer, kwds) 404 405 try: --> 406 data = parser.read() 407 finally: 408 parser.close()

C:\Users\chris.bartak\Documents\python-dev\pandas\pandas\io\parsers.py in read(self, nrows) 977 raise ValueError('skipfooter not supported for iteration') 978 --> 979 ret = self._engine.read(nrows) 980 981 if self.options.get('as_recarray'):

C:\Users\chris.bartak\Documents\python-dev\pandas\pandas\io\parsers.py in read(self, rows) 2066 def read(self, rows=None): 2067 try: -> 2068 content = self._get_lines(rows) 2069 except StopIteration: 2070 if self._first_chunk:

C:\Users\chris.bartak\Documents\python-dev\pandas\pandas\io\parsers.py in _get_lines(self, rows) 2717 while True: 2718 try: -> 2719 new_rows.append(next(source)) 2720 rows += 1 2721 except csv.Error as inst:

Error: ',' expected after '"'

This error only happens if the last row has quoting, and is invalid - e.g. delete the morejunk above and it does not error.