ENH: add to/from_parquet with pyarrow & fastparquet by jreback · Pull Request #15838 · pandas-dev/pandas (original) (raw)
yeah was trying to have helpful errors. ok I think both pyarrow
and fastparquet
should fail gracefully here then.
In [1]: df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list('aaa'))
In [3]: df.to_parquet('foo', 'pyarrow')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/Users/jreback/pandas/pandas/core/common.py in _asarray_tuplesafe(values, dtype)
398 result = np.empty(len(values), dtype=object)
--> 399 result[:] = values
400 except ValueError:
ValueError: could not broadcast input array from shape (4,3) into shape (4)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-3-185ceaef9fe4> in <module>()
----> 1 df.to_parquet('foo', 'pyarrow')
/Users/jreback/pandas/pandas/core/frame.py in to_parquet(self, fname, engine, compression)
1538 """
1539 from pandas.io.parquet import to_parquet
-> 1540 to_parquet(self, fname, engine, compression=compression)
1541
1542 @Substitution(header='Write out column names. If a list of string is given, \
/Users/jreback/pandas/pandas/io/parquet.py in to_parquet(df, path, engine, compression)
97 from pyarrow import parquet as pq
98
---> 99 table = pyarrow.Table.from_pandas(df)
100 pq.write_table(table, path, compression=compression)
ValueError: cannot copy sequence with size 3 to array axis with dimension 4
n [4]: df.to_parquet('foo', 'fastparquet')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-4-6b46c1abdc2f> in <module>()
----> 1 df.to_parquet('foo', 'fastparquet')
/Users/jreback/pandas/pandas/core/frame.py in to_parquet(self, fname, engine, compression)
1538 """
1539 from pandas.io.parquet import to_parquet
-> 1540 to_parquet(self, fname, engine, compression=compression)
1541
1542 @Substitution(header='Write out column names. If a list of string is given, \
/Users/jreback/pandas/pandas/io/parquet.py in to_parquet(df, path, engine, compression)
107 # Use tobytes() instead.
108 with catch_warnings(record=True):
--> 109 fastparquet.write(path, df, compression=compression)
110
111
/Users/jreback/miniconda3/envs/pandas/lib/python3.6/site-packages/fastparquet/writer.py in write(filename, data, row_group_offsets, compression, file_scheme, open_with, mkdirs, has_nulls, write_index, partition_on, fixed_text, append, object_encoding, times)
747 fmd = make_metadata(data, has_nulls=has_nulls, ignore_columns=ignore,
748 fixed_text=fixed_text, object_encoding=object_encoding,
--> 749 times=times)
750
751 if file_scheme == 'simple':
/Users/jreback/miniconda3/envs/pandas/lib/python3.6/site-packages/fastparquet/writer.py in make_metadata(data, has_nulls, ignore_columns, fixed_text, object_encoding, times)
608 object_encoding.get(column, None))
609 fixed = None if fixed_text is None else fixed_text.get(column, None)
--> 610 if str(data[column].dtype) == 'category':
611 se, type = find_type(data[column].cat.categories,
612 fixed_text=fixed, object_encoding=oencoding)
/Users/jreback/pandas/pandas/core/generic.py in __getattr__(self, name)
2888 if name in self._info_axis:
2889 return self[name]
-> 2890 return object.__getattribute__(self, name)
2891
2892 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'dtype'