v0.15.0 Can't read csv.gz from url · Issue #8685 · pandas-dev/pandas (original) (raw)

import pandas as pd
pd.read_csv('https://raw.githubusercontent.com/YeoLab/shalek2013/master/expression.csv.gz', compression='gzip', index_col=0)

---------------------------------------------------------------------------
CParserError                              Traceback (most recent call last)
<ipython-input-17-2e5c16b1e504> in <module>()
----> 1 pd.read_csv('https://raw.githubusercontent.com/YeoLab/shalek2013/master/expression.csv.gz', compression='gzip', index_col=0)

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    461                     skip_blank_lines=skip_blank_lines)
    462 
--> 463         return _read(filepath_or_buffer, kwds)
    464 
    465     parser_f.__name__ = name

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    237 
    238     # Create the parser.
--> 239     parser = TextFileReader(filepath_or_buffer, **kwds)
    240 
    241     if (nrows is not None) and (chunksize is not None):

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    551             self.options['has_index_names'] = kwds['has_index_names']
    552 
--> 553         self._make_engine(self.engine)
    554 
    555     def _get_options_with_defaults(self, engine):

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    688     def _make_engine(self, engine='c'):
    689         if engine == 'c':
--> 690             self._engine = CParserWrapper(self.f, **self.options)
    691         else:
    692             if engine == 'python':

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
   1050         kwds['allow_leading_cols'] = self.index_col is not False
   1051 
-> 1052         self._reader = _parser.TextReader(src, **kwds)
   1053 
   1054         # XXX

/usr/local/lib/python2.7/site-packages/pandas/parser.so in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4693)()

/usr/local/lib/python2.7/site-packages/pandas/parser.so in pandas.parser.TextReader._get_header (pandas/parser.c:6091)()

/usr/local/lib/python2.7/site-packages/pandas/parser.so in pandas.parser.TextReader._tokenize_rows (pandas/parser.c:8119)()

/usr/local/lib/python2.7/site-packages/pandas/parser.so in pandas.parser.raise_parser_error (pandas/parser.c:20349)()

CParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.