resample strange behavior on get_item · Issue #13174 · pandas-dev/pandas (original) (raw)

Code Sample, a copy-pastable example if possible

import pandas as pd
data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}]
df = pd.DataFrame(data, index=pd.date_range('2016-01-01', periods=2))
resampler = df.groupby('id').resample('1D')

If I then run resampler['buyer'].count() first time, it will output normally:

id
1   2016-01-01    1
2   2016-01-02    1
Name: buyer, dtype: int64

If I then run resampler['buyer'].count() again, it will report error:

/usr/local/bin/ipython:1: FutureWarning: .resample() is now a deferred operation
use .resample(...).mean() instead of .resample(...)
  #!/usr/local/opt/python/bin/python2.7
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-30-5714547cf98c> in <module>()
----> 1 resampler['buyer'].count()

/usr/local/lib/python2.7/site-packages/pandas/tseries/resample.pyc in __getitem__(self, key)
    179             # compat for deprecated
    180             if isinstance(self.obj, com.ABCSeries):
--> 181                 return self._deprecated()[key]
    182
    183             raise

/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1995             return self._getitem_multilevel(key)
   1996         else:
-> 1997             return self._getitem_column(key)
   1998
   1999     def _getitem_column(self, key):

/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   2002         # get column
   2003         if self.columns.is_unique:
-> 2004             return self._get_item_cache(key)
   2005
   2006         # duplicate columns & possible reduce dimensionality

/usr/local/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1348         res = cache.get(item)
   1349         if res is None:
-> 1350             values = self._data.get(item)
   1351             res = self._box_item_values(item, values)
   1352             cache[item] = res

/usr/local/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   3288
   3289             if not isnull(item):
-> 3290                 loc = self.items.get_loc(item)
   3291             else:
   3292                 indexer = np.arange(len(self.items))[isnull(self.items)]

/usr/local/lib/python2.7/site-packages/pandas/indexes/base.pyc in get_loc(self, key, method, tolerance)
   1945                 return self._engine.get_loc(key)
   1946             except KeyError:
-> 1947                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   1948
   1949         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4154)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12368)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12322)()

KeyError: 'buyer'

If I run df.groupby('id').resample('1D')['buyer'].count() instead of resampler['buyer'].count(), the problem will not appear.

What's the problem?

output of pd.show_versions()

INSTALLED VERSIONS

commit: None
python: 2.7.11.final.0
python-bits: 64
OS: Darwin
OS-release: 15.4.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: zh_CN.UTF-8

pandas: 0.18.1
nose: None
pip: 8.1.1
setuptools: 19.4
Cython: None
numpy: 1.11.0
scipy: None
statsmodels: None
xarray: None
IPython: 4.2.0
sphinx: 1.3.5
patsy: None
dateutil: 2.5.3
pytz: 2016.4
blosc: None
bottleneck: None
tables: None
numexpr: None
matplotlib: None
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.12
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None
pandas_datareader: None