Dir fails on dataframes with pathological column names · Issue #25509 · pandas-dev/pandas (original) (raw)
Code Sample, a copy-pastable example if possible
import pandas as pd df = pd.DataFrame({'\ud83d': []}) _ = dir(df)
UnicodeEncodeError Traceback (most recent call last) in ----> 1 _ = dir(df)
~/miniconda/envs/dev/lib/python3.7/site-packages/pandas/core/accessor.py in dir(self) 37 """ 38 rv = set(dir(type(self))) ---> 39 rv = (rv - self._dir_deletions()) | self._dir_additions() 40 return sorted(rv) 41
~/miniconda/envs/dev/lib/python3.7/site-packages/pandas/core/generic.py in _dir_additions(self) 5110 If info_axis is a MultiIndex, it's first level values are used. 5111 """ -> 5112 additions = {c for c in self._info_axis.unique(level=0)[:100] 5113 if isinstance(c, string_types) and isidentifier(c)} 5114 return super(NDFrame, self)._dir_additions().union(additions)
~/miniconda/envs/dev/lib/python3.7/site-packages/pandas/core/indexes/base.py in unique(self, level) 1999 if level is not None: 2000 self._validate_index_level(level) -> 2001 result = super(Index, self).unique() 2002 return self._shallow_copy(result) 2003
~/miniconda/envs/dev/lib/python3.7/site-packages/pandas/core/base.py in unique(self) 1312 else: 1313 from pandas.core.algorithms import unique1d -> 1314 result = unique1d(values) 1315 1316 return result
~/miniconda/envs/dev/lib/python3.7/site-packages/pandas/core/algorithms.py in unique(values) 360 361 table = htable(len(values)) --> 362 uniques = table.unique(values) 363 uniques = _reconstruct_data(uniques, dtype, original) 364 return uniques
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.StringHashTable.unique()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.StringHashTable._unique()
UnicodeEncodeError: 'utf-8' codec can't encode character '\ud83d' in position 0: surrogates not allowed
Problem description
Dir fails on dataframes with pathalogical column names
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.7.1.final.0
python-bits: 64
OS: Darwin
OS-release: 18.2.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
pandas: 0.24.1
pytest: 3.10.1
pip: 18.1
setuptools: 40.6.2
Cython: None
numpy: 1.15.4
scipy: 1.1.0
pyarrow: 0.11.1
xarray: 0.11.3
IPython: 7.2.0
sphinx: 1.8.4
patsy: None
dateutil: 2.7.5
pytz: 2018.7
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: None
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml.etree: None
bs4: None
html5lib: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: 0.2.0
fastparquet: 0.1.6
pandas_gbq: None
pandas_datareader: None
gcsfs: None