MultiIndexes containing >=1000000 elements do not work · Issue #1757 · pandas-dev/pandas (original) (raw)
With current master, any attempt to index into a Series (or whatever) with a MultiIndex and >=1000000 (= _SIZE_CUTOFF) elements simply raises an error:
In [5]: n = 1000000 - 1; pandas.Series(np.arange(n), pandas.MultiIndex.from_arrays((["a"] * n, np.arange(n))))[("a", 5)]
Out[5]: 5
In [6]: n = 1000000; pandas.Series(np.arange(n), pandas.MultiIndex.from_arrays((["a"] * n, np.arange(n))))[("a", 5)]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-6-a83549b6c664> in <module>()
----> 1 n = 1000000; pandas.Series(np.arange(n), pandas.MultiIndex.from_arrays((["a"] * n, np.arange(n))))[("a", 5)]
/home/njs/src/pandas/pandas/core/series.pyc in __getitem__(self, key)
453 key = np.asarray(key, dtype=bool)
454
--> 455 return self._get_with(key)
456
457 def _get_with(self, key):
/home/njs/src/pandas/pandas/core/series.pyc in _get_with(self, key)
471 if isinstance(key, tuple):
472 try:
--> 473 return self._get_values_tuple(key)
474 except:
475 if len(key) == 1:
/home/njs/src/pandas/pandas/core/series.pyc in _get_values_tuple(self, key)
514
515 # If key is contained, would have returned by now
--> 516 indexer, new_index = self.index.get_loc_level(key)
517 return Series(self.values[indexer], index=new_index, name=self.name)
518
/home/njs/src/pandas/pandas/core/index.pyc in get_loc_level(self, key, level)
2069 if not any(isinstance(k, slice) for k in key):
2070 if len(key) == self.nlevels:
-> 2071 return self._engine.get_loc(key), None
2072 else:
2073 # partial selection
/home/njs/src/pandas/pandas/lib.so in pandas.lib.IndexEngine.get_loc (pandas/src/tseries.c:107666)()
/home/njs/src/pandas/pandas/lib.so in pandas.lib.IndexEngine.get_loc (pandas/src/tseries.c:107423)()
/home/njs/src/pandas/pandas/lib.so in util.get_value_at (pandas/src/tseries.c:114800)()
TypeError: only integer arrays with one element can be converted to an index