HDFStore.append fails when appending dataframe with empty string column for which min_itemsize < 8 · Issue #12242 · pandas-dev/pandas (original) (raw)

In [3]: store = pd.HDFStore('teststore.h5', 'w')

In [4]: chunk = pd.DataFrame({'V1':['a','b','c','d','e'], 'data':np.arange(5)})

In [5]: store.append('df', chunk, min_itemsize={'V1': 4})

In [6]: chunk = pd.DataFrame({'V1':['', ''], 'data': [3, 5]})

In [7]: store.append('df', chunk, min_itemsize={'V1': 4})
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-c9bafa18ead0> in <module>()
----> 1 store.append('df', chunk, min_itemsize={'V1': 4})

/Users/amcpherson/Anaconda/lib/python2.7/site-packages/pandas/io/pytables.pyc in append(self, key, value, format, append, columns, dropna, **kwargs)
    905         kwargs = self._validate_format(format, kwargs)
    906         self._write_to_group(key, value, append=append, dropna=dropna,
--> 907                              **kwargs)
    908
    909     def append_to_multiple(self, d, value, selector, data_columns=None,

/Users/amcpherson/Anaconda/lib/python2.7/site-packages/pandas/io/pytables.pyc in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
   1250
   1251         # write the object
-> 1252         s.write(obj=value, append=append, complib=complib, **kwargs)
   1253
   1254         if s.is_table and index:

/Users/amcpherson/Anaconda/lib/python2.7/site-packages/pandas/io/pytables.pyc in write(self, obj, axes, append, complib, complevel, fletcher32, min_itemsize, chunksize, expectedrows, dropna, **kwargs)
   3755         self.create_axes(axes=axes, obj=obj, validate=append,
   3756                          min_itemsize=min_itemsize,
-> 3757                          **kwargs)
   3758
   3759         for a in self.axes:

/Users/amcpherson/Anaconda/lib/python2.7/site-packages/pandas/io/pytables.pyc in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
   3432                 self.values_axes.append(col)
   3433             except (NotImplementedError, ValueError, TypeError) as e:
-> 3434                 raise e
   3435             except Exception as detail:
   3436                 raise Exception(

ValueError: Trying to store a string with len [8] in [V1] column but
this column has a limit of [4]!
Consider using min_itemsize to preset the sizes on these columns

Does not raise unless all values in a column are empty strings. A workaround is to set min_itemsize to 8 or higher.

In [10]: pd.show_versions()

INSTALLED VERSIONS
------------------
commit: None
python: 2.7.11.final.0
python-bits: 64
OS: Darwin
OS-release: 14.5.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_CA.UTF-8

pandas: 0.17.1
nose: 1.3.7
pip: 8.0.2
setuptools: 19.6.2
Cython: 0.23.4
numpy: 1.10.2
scipy: 0.16.1
statsmodels: 0.6.1
IPython: 4.0.3
sphinx: 1.3.5
patsy: 0.4.0
dateutil: 2.4.2
pytz: 2015.7
blosc: None
bottleneck: 1.0.0
tables: 3.2.2
numexpr: 2.4.6
matplotlib: 1.5.1
openpyxl: 2.3.2
xlrd: 0.9.4
xlwt: 1.0.0
xlsxwriter: 0.8.4
lxml: 3.5.0
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.11
pymysql: None
psycopg2: None
Jinja2: None