Minor Bug: Boolean masking not working for Index objects · Issue #17131 · pandas-dev/pandas (original) (raw)
To get the boolean mask to work, you must use the np.array
representation instead of the pd.Index
itself. In previous releases, this could be done. Referenced here: #17129 (comment)
df = pd.DataFrame(np.random.random(size=(5,10)), index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
# 0 1 2 3 4 5 6 7 8 9
# alpha_0 0.039744 0.952401 0.926357 0.819101 0.326789 0.167907 0.400881 0.991539 0.014391 0.025816
# alpha_1 0.794766 0.089820 0.806102 0.196176 0.988310 0.241682 0.860940 0.454829 0.944760 0.306390
# alpha_2 0.976126 0.801022 0.499439 0.895672 0.754903 0.120859 0.013098 0.074128 0.900939 0.068916
# beta_0 0.674020 0.576319 0.471810 0.584221 0.071395 0.781328 0.933798 0.644589 0.233111 0.057664
# beta_1 0.657491 0.436408 0.867786 0.578557 0.080468 0.957060 0.646107 0.434932 0.807671 0.088512
mask_alpha = df.index.map(lambda x:"alpha" in x)
THIS DOESN"T WORK
df.loc[mask_alpha,:]
# ---------------------------------------------------------------------------
# KeyError Traceback (most recent call last)
# <ipython-input-153-be7286f5b810> in <module>()
# 1 mask_alpha = df.index.map(lambda x:"alpha" in x)
# ----> 2 df.loc[mask_alpha,:]
# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
# 1323 except (KeyError, IndexError):
# 1324 pass
# -> 1325 return self._getitem_tuple(key)
# 1326 else:
# 1327 key = com._apply_if_callable(key, self.obj)
# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
# 839
# 840 # no multi-index, so validate all of the indexers
# --> 841 self._has_valid_tuple(tup)
# 842
# 843 # ugly hack for GH #836
# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_tuple(self, key)
# 187 if i >= self.obj.ndim:
# 188 raise IndexingError('Too many indexers')
# --> 189 if not self._has_valid_type(k, i):
# 190 raise ValueError("Location based indexing can only have [%s] "
# 191 "types" % self._valid_types)
# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_type(self, key, axis)
# 1416
# 1417 raise KeyError("None of [%s] are in the [%s]" %
# -> 1418 (key, self.obj._get_axis_name(axis)))
# 1419
# 1420 return True
# KeyError: "None of [Index([True, True, True, False, False], dtype='object')] are in the [index]"
BUT THIS WORKS
df.loc[mask_alpha.values,:]
# 0 1 2 3 4 5 6 7 8 9
# alpha_0 0.522739 0.792373 0.265758 0.771431 0.122409 0.926443 0.769170 0.049860 0.913113 0.080249
# alpha_1 0.353297 0.916381 0.218976 0.911158 0.016677 0.216542 0.690162 0.411131 0.496376 0.703492
# alpha_2 0.637349 0.110855 0.310767 0.979740 0.842421 0.592446 0.223231 0.351051 0.478721 0.488652