resample strange behavior on get_item · Issue #13174 · pandas-dev/pandas (original) (raw)
Code Sample, a copy-pastable example if possible
import pandas as pd
data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}]
df = pd.DataFrame(data, index=pd.date_range('2016-01-01', periods=2))
resampler = df.groupby('id').resample('1D')
If I then run resampler['buyer'].count()
first time, it will output normally:
id
1 2016-01-01 1
2 2016-01-02 1
Name: buyer, dtype: int64
If I then run resampler['buyer'].count()
again, it will report error:
/usr/local/bin/ipython:1: FutureWarning: .resample() is now a deferred operation
use .resample(...).mean() instead of .resample(...)
#!/usr/local/opt/python/bin/python2.7
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-30-5714547cf98c> in <module>()
----> 1 resampler['buyer'].count()
/usr/local/lib/python2.7/site-packages/pandas/tseries/resample.pyc in __getitem__(self, key)
179 # compat for deprecated
180 if isinstance(self.obj, com.ABCSeries):
--> 181 return self._deprecated()[key]
182
183 raise
/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
1995 return self._getitem_multilevel(key)
1996 else:
-> 1997 return self._getitem_column(key)
1998
1999 def _getitem_column(self, key):
/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
2002 # get column
2003 if self.columns.is_unique:
-> 2004 return self._get_item_cache(key)
2005
2006 # duplicate columns & possible reduce dimensionality
/usr/local/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
1348 res = cache.get(item)
1349 if res is None:
-> 1350 values = self._data.get(item)
1351 res = self._box_item_values(item, values)
1352 cache[item] = res
/usr/local/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
3288
3289 if not isnull(item):
-> 3290 loc = self.items.get_loc(item)
3291 else:
3292 indexer = np.arange(len(self.items))[isnull(self.items)]
/usr/local/lib/python2.7/site-packages/pandas/indexes/base.pyc in get_loc(self, key, method, tolerance)
1945 return self._engine.get_loc(key)
1946 except KeyError:
-> 1947 return self._engine.get_loc(self._maybe_cast_indexer(key))
1948
1949 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4154)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12368)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12322)()
KeyError: 'buyer'
If I run df.groupby('id').resample('1D')['buyer'].count()
instead of resampler['buyer'].count()
, the problem will not appear.
What's the problem?
output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 2.7.11.final.0
python-bits: 64
OS: Darwin
OS-release: 15.4.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: zh_CN.UTF-8
pandas: 0.18.1
nose: None
pip: 8.1.1
setuptools: 19.4
Cython: None
numpy: 1.11.0
scipy: None
statsmodels: None
xarray: None
IPython: 4.2.0
sphinx: 1.3.5
patsy: None
dateutil: 2.5.3
pytz: 2016.4
blosc: None
bottleneck: None
tables: None
numexpr: None
matplotlib: None
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.12
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None
pandas_datareader: None