MultiIndexes containing >=1000000 elements do not work · Issue #1757 · pandas-dev/pandas (original) (raw)

With current master, any attempt to index into a Series (or whatever) with a MultiIndex and >=1000000 (= _SIZE_CUTOFF) elements simply raises an error:

In [5]: n = 1000000 - 1; pandas.Series(np.arange(n), pandas.MultiIndex.from_arrays((["a"] * n, np.arange(n))))[("a", 5)]
Out[5]: 5

In [6]: n = 1000000; pandas.Series(np.arange(n), pandas.MultiIndex.from_arrays((["a"] * n, np.arange(n))))[("a", 5)]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-a83549b6c664> in <module>()
----> 1 n = 1000000; pandas.Series(np.arange(n), pandas.MultiIndex.from_arrays((["a"] * n, np.arange(n))))[("a", 5)]

/home/njs/src/pandas/pandas/core/series.pyc in __getitem__(self, key)
    453             key = np.asarray(key, dtype=bool)
    454 
--> 455         return self._get_with(key)
    456 
    457     def _get_with(self, key):

/home/njs/src/pandas/pandas/core/series.pyc in _get_with(self, key)
    471             if isinstance(key, tuple):
    472                 try:
--> 473                     return self._get_values_tuple(key)
    474                 except:
    475                     if len(key) == 1:

/home/njs/src/pandas/pandas/core/series.pyc in _get_values_tuple(self, key)
    514 
    515         # If key is contained, would have returned by now
--> 516         indexer, new_index = self.index.get_loc_level(key)
    517         return Series(self.values[indexer], index=new_index, name=self.name)
    518 

/home/njs/src/pandas/pandas/core/index.pyc in get_loc_level(self, key, level)
   2069             if not any(isinstance(k, slice) for k in key):
   2070                 if len(key) == self.nlevels:
-> 2071                     return self._engine.get_loc(key), None
   2072                 else:
   2073                     # partial selection

/home/njs/src/pandas/pandas/lib.so in pandas.lib.IndexEngine.get_loc (pandas/src/tseries.c:107666)()

/home/njs/src/pandas/pandas/lib.so in pandas.lib.IndexEngine.get_loc (pandas/src/tseries.c:107423)()

/home/njs/src/pandas/pandas/lib.so in util.get_value_at (pandas/src/tseries.c:114800)()

TypeError: only integer arrays with one element can be converted to an index