BUG: .filter with unicode labels when can't encode · Issue #13101 · pandas-dev/pandas (original) (raw)

Edit #10506 breaks if the DataFrame contains unicode column names with non-ASCII characters.

import pandas as pd
df = pd.DataFrame({u'a': [1, 2, 3], u'ä': [4, 5, 6]})
df.filter(regex=u'a')

throws me a

---------------------------------------------------------------------------
UnicodeEncodeError                        Traceback (most recent call last)
<ipython-input-10-9de5a19c260e> in <module>()
----> 1 df.filter(regex=u'a')

C:\Users\...\AppData\Local\Continuum\32bit\Anaconda\envs\test\lib\site-packages\pandas\core\generic.pyc in filter(self, items, like, regex, axis)
   2013             matcher = re.compile(regex)
   2014             return self.select(lambda x: matcher.search(str(x)) is not None,
-> 2015                                axis=axis_name)
   2016         else:
   2017             raise TypeError('Must pass either `items`, `like`, or `regex`')

C:\Users\...\AppData\Local\Continuum\32bit\Anaconda\envs\test\lib\site-packages\pandas\core\generic.pyc in select(self, crit, axis)
   1545         if len(axis_values) > 0:
   1546             new_axis = axis_values[
-> 1547                 np.asarray([bool(crit(label)) for label in axis_values])]
   1548         else:
   1549             new_axis = axis_values

C:\Users\...\AppData\Local\Continuum\32bit\Anaconda\envs\test\lib\site-packages\pandas\core\generic.pyc in <lambda>(x)
   2012         elif regex:
   2013             matcher = re.compile(regex)
-> 2014             return self.select(lambda x: matcher.search(str(x)) is not None,
   2015                                axis=axis_name)
   2016         else:

UnicodeEncodeError: 'ascii' codec can't encode character u'\xe4' in position 0: ordinal not in range(128)