BUG: .filter with unicode labels when can't encode · Issue #13101 · pandas-dev/pandas (original) (raw)
Edit #10506 breaks if the DataFrame contains unicode column names with non-ASCII characters.
import pandas as pd
df = pd.DataFrame({u'a': [1, 2, 3], u'ä': [4, 5, 6]})
df.filter(regex=u'a')
throws me a
---------------------------------------------------------------------------
UnicodeEncodeError Traceback (most recent call last)
<ipython-input-10-9de5a19c260e> in <module>()
----> 1 df.filter(regex=u'a')
C:\Users\...\AppData\Local\Continuum\32bit\Anaconda\envs\test\lib\site-packages\pandas\core\generic.pyc in filter(self, items, like, regex, axis)
2013 matcher = re.compile(regex)
2014 return self.select(lambda x: matcher.search(str(x)) is not None,
-> 2015 axis=axis_name)
2016 else:
2017 raise TypeError('Must pass either `items`, `like`, or `regex`')
C:\Users\...\AppData\Local\Continuum\32bit\Anaconda\envs\test\lib\site-packages\pandas\core\generic.pyc in select(self, crit, axis)
1545 if len(axis_values) > 0:
1546 new_axis = axis_values[
-> 1547 np.asarray([bool(crit(label)) for label in axis_values])]
1548 else:
1549 new_axis = axis_values
C:\Users\...\AppData\Local\Continuum\32bit\Anaconda\envs\test\lib\site-packages\pandas\core\generic.pyc in <lambda>(x)
2012 elif regex:
2013 matcher = re.compile(regex)
-> 2014 return self.select(lambda x: matcher.search(str(x)) is not None,
2015 axis=axis_name)
2016 else:
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe4' in position 0: ordinal not in range(128)