Regression in series.map? · Issue #8024 · pandas-dev/pandas (original) (raw)
import pandas
from statsmodels import datasets
# load the data and clean it a bit
affairs = datasets.fair.load_pandas()
datas = affairs.exog
# any time greater than 0 is cheating
datas['cheated'] = affairs.endog > 0
# sort by the marriage quality and give meaningful name
# [rate_marriage, age, yrs_married, children,
# religious, educ, occupation, occupation_husb]
datas = datas.sort(['rate_marriage', 'religious'])
num_to_desc = {1: 'awful', 2: 'bad', 3: 'intermediate',
4: 'good', 5: 'wonderful'}
datas['rate_marriage'] = datas['rate_marriage'].map(num_to_desc)
num_to_faith = {1: 'non religious', 2: 'poorly religious', 3: 'religious',
4: 'very religious'}
datas['religious'] = datas['religious'].map(num_to_faith)
num_to_cheat = {False: 'faithful', True: 'cheated'}
datas['cheated'] = datas['cheated'].map(num_to_cheat)
part of the following test that fails on pythonxy Ubuntu testing
ERROR: statsmodels.graphics.tests.test_mosaicplot.test_mosaic
Traceback (most recent call last):
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest
self.test(_self.arg)
File "/usr/lib/python2.7/dist-packages/numpy/testing/decorators.py",
line 146, in skipper_func
return f(_args, **kwargs)
File "/build/buildd/statsmodels-0.6.0ppa18revno/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/graphics/tests/test_mosaicplot.py",
line 124, in test_mosaic
datas['cheated'] = datas['cheated'].map(num_to_cheat)
File "/usr/lib/pymodules/python2.7/pandas/core/series.py", line 1960, in map
indexer = arg.index.get_indexer(values)
File "/usr/lib/pymodules/python2.7/pandas/core/index.py", line 1460,
in get_indexer
if not self.is_unique:
File "properties.pyx", line 34, in pandas.lib.cache_readonly.get
(pandas/lib.c:38722)
File "/usr/lib/pymodules/python2.7/pandas/core/index.py", line 571,
in is_unique
return self._engine.is_unique
File "index.pyx", line 205, in
pandas.index.IndexEngine.is_unique.get (pandas/index.c:4338)
File "index.pyx", line 234, in
pandas.index.IndexEngine._do_unique_check (pandas/index.c:4790)
File "index.pyx", line 247, in
pandas.index.IndexEngine._ensure_mapping_populated
(pandas/index.c:4995)
File "index.pyx", line 253, in pandas.index.IndexEngine.initialize
(pandas/index.c:5092)
File "hashtable.pyx", line 731, in
pandas.hashtable.PyObjectHashTable.map_locations
(pandas/hashtable.c:12440)
ValueError: Does not understand character buffer dtype format string ('?')
This works on '0.13.1' but not on '0.14.1-202-g7d702e9'