read_csv with iterator=True and engine = "python" -> error in if self.usecols · Issue #12546 · pandas-dev/pandas (original) (raw)

Code Sample, a copy-pastable example if possible

Following example returns error if run with engine = "python":

  ite = pd.read_csv(infile, sep = '\t', index_col = False, 
              # dtype=pd.np.float32, na_filter=False,low_memory=False,
                usecols = pd.np.arange(nindexcolumns, ncol), 
                iterator = True,   chunksize = chunksize  , engine = "python" )
 print(list(ite)[0])

Error

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-6-e39439e21b50> in <module>()
     17                     iterator = True,   chunksize = chunksize  , engine = "python" )
     18 
---> 19 print(list(ite)[0])

/home/.usr/py3/lib/pandas-0.18.0rc1+80.g820e110.dirty-py3.3-linux-x86_64.egg/pandas/io/parsers.py in __next__(self)
    739 
    740     def __next__(self):
--> 741         return self.get_chunk()
    742 
    743     def _make_engine(self, engine='c'):

/home/.usr/py3/lib/pandas-0.18.0rc1+80.g820e110.dirty-py3.3-linux-x86_64.egg/pandas/io/parsers.py in get_chunk(self, size)
    780         if size is None:
    781             size = self.chunksize
--> 782         return self.read(nrows=size)
    783 
    784 

/home/.usr/py3/lib/pandas-0.18.0rc1+80.g820e110.dirty-py3.3-linux-x86_64.egg/pandas/io/parsers.py in read(self, nrows)
    759                 raise ValueError('skip_footer not supported for iteration')
    760 
--> 761         ret = self._engine.read(nrows)
    762 
    763         if self.options.get('as_recarray'):

/home/.usr/py3/lib/pandas-0.18.0rc1+80.g820e110.dirty-py3.3-linux-x86_64.egg/pandas/io/parsers.py in read(self, rows)
   1617             content = content[1:]
   1618 
-> 1619         alldata = self._rows_to_cols(content)
   1620         data = self._exclude_implicit_index(alldata)
   1621 

/home/.usr/py3/lib/pandas-0.18.0rc1+80.g820e110.dirty-py3.3-linux-x86_64.egg/pandas/io/parsers.py in _rows_to_cols(self, content)
   1997             raise ValueError(msg)
   1998 
-> 1999         if self.usecols:
   2000             if self._implicit_index:
   2001                 zipped_content = [

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

output of pd.show_versions()

INSTALLED VERSIONS

commit: None
python: 3.3.2.final.0
python-bits: 64
OS: Linux
OS-release: 2.6.32-573.12.1.el6.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8

pandas: 0.18.0rc1+80.g820e110.dirty
nose: 1.3.0
pip: 8.0.2
setuptools: 0.9.8
Cython: 0.23.4
numpy: 1.10.4
scipy: 0.12.1
statsmodels: None
xarray: None
IPython: 4.1.0-dev
sphinx: None
patsy: None
dateutil: 2.4.2
pytz: 2015.7
blosc: None
bottleneck: None
tables: 3.2.2
numexpr: 2.4.7.dev0
matplotlib: 1.5.1
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None