issues with overlapping multi index intervals · Issue #27456 · pandas-dev/pandas (original) (raw)

Scenario 1: single-level indexing, which works fine:

import pandas as pd # pandas version 0.25.0, python version: 3.6.6
idx = pd.IntervalIndex.from_arrays([1,3,1,2],
                             [3,4,2,4])
df = pd.DataFrame({'Value':[1,2,3,4]},index=idx) 

which returns:

df = 
          Value
(1,3]   1
(3,4]   2
(1,2]   3
(2,4]   4

query results:

df.loc[1.5] = 
          Value
(1,3]   1
(1,2]   3

Scenario 2: Multi-level indexing:

idx1 = pd.MultiIndex.from_arrays([
    pd.Index(['label1','label1','label2','label2']),
    pd.IntervalIndex.from_arrays([1,3,1,2],
                             [3,4,2,4])
])
idx2 = pd.MultiIndex.from_arrays([
    pd.Index(['label1','label1','label2','label2']),
    pd.IntervalIndex.from_arrays([1,2,1,2],
                             [2,4,2,4])
])
df1 = pd.DataFrame({'Value':[1,2,3,4]},index=idx1) #with overlapping intervals 
df2 = pd.DataFrame({'Value':[1,2,3,4]},index=idx2) #without overlapping intervals

which returns:

df1 = 
                    Value
label1    (1,3]   1
label1    (3,4]   2
label2    (1,2]   3
label2    (2,4]   4
df2 = 
                    Value
label1    (1,2]   1
label1    (2,4]   2
label2    (1,2]   3
label2    (2,4]   4

query method 1: works fine on both df1 and df2 but is slow

df1.Value.loc['label1'].loc[1.5]
1

query method 2: works only with df2, doesn't work with df1, is 10 times faster than query method 1

df2.Value.loc[('label1',1.5)]
1
df1.Value.loc[('label1',1.5)]

KeyError Traceback (most recent call last)
C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2889 try:
-> 2890 return self._engine.get_loc(key)
2891 except KeyError:

pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 1.5

During handling of the above exception, another exception occurred:

KeyError Traceback (most recent call last)
in ()
11 display(df)
12 print(df.loc['label1'].loc[1.5])
---> 13 print(df.loc[('label1',1.5)])

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in getitem(self, key)
1402 except (KeyError, IndexError, AttributeError):
1403 pass
-> 1404 return self._getitem_tuple(key)
1405 else:
1406 # we by definition only have the 0th axis

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
789 def _getitem_tuple(self, tup):
790 try:
--> 791 return self._getitem_lowerdim(tup)
792 except IndexingError:
793 pass

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
945 return section
946 # This is an elided recursive call to iloc/loc/etc'
--> 947 return getattr(section, self.name)[new_key]
948
949 raise IndexingError("not applicable")

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in getitem(self, key)
1402 except (KeyError, IndexError, AttributeError):
1403 pass
-> 1404 return self._getitem_tuple(key)
1405 else:
1406 # we by definition only have the 0th axis

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
789 def _getitem_tuple(self, tup):
790 try:
--> 791 return self._getitem_lowerdim(tup)
792 except IndexingError:
793 pass

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
913 for i, key in enumerate(tup):
914 if is_label_like(key) or isinstance(key, tuple):
--> 915 section = self._getitem_axis(key, axis=i)
916
917 # we have yielded a scalar ?

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1823 # fall thru to straight lookup
1824 self._validate_key(key, axis)
-> 1825 return self._get_label(key, axis=axis)
1826
1827

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
155 raise IndexingError("no slices here, handle elsewhere")
156
--> 157 return self.obj._xs(label, axis=axis)
158
159 def _get_loc(self, key: int, axis: int):

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3728
3729 if axis == 1:
-> 3730 return self[key]
3731
3732 self._consolidate_inplace()

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\frame.py in getitem(self, key)
2973 if self.columns.nlevels > 1:
2974 return self._getitem_multilevel(key)
-> 2975 indexer = self.columns.get_loc(key)
2976 if is_integer(indexer):
2977 indexer = [indexer]

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2890 return self._engine.get_loc(key)
2891 except KeyError:
-> 2892 return self._engine.get_loc(self._maybe_cast_indexer(key))
2893 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2894 if indexer.ndim > 1 or indexer.size > 1:

pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 1.5