Minor Bug: Boolean masking not working for Index objects (original) (raw)
To get the boolean mask to work, you must use the np.array
representation instead of the pd.Index
itself. In previous releases, this could be done. Referenced here: #17129 (comment)
df = pd.DataFrame(np.random.random(size=(5,10)), index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
# 0 1 2 3 4 5 6 7 8 9
# alpha_0 0.039744 0.952401 0.926357 0.819101 0.326789 0.167907 0.400881 0.991539 0.014391 0.025816
# alpha_1 0.794766 0.089820 0.806102 0.196176 0.988310 0.241682 0.860940 0.454829 0.944760 0.306390
# alpha_2 0.976126 0.801022 0.499439 0.895672 0.754903 0.120859 0.013098 0.074128 0.900939 0.068916
# beta_0 0.674020 0.576319 0.471810 0.584221 0.071395 0.781328 0.933798 0.644589 0.233111 0.057664
# beta_1 0.657491 0.436408 0.867786 0.578557 0.080468 0.957060 0.646107 0.434932 0.807671 0.088512
mask_alpha = df.index.map(lambda x:"alpha" in x)
THIS DOESN"T WORK
df.loc[mask_alpha,:]
# ---------------------------------------------------------------------------
# KeyError Traceback (most recent call last)
# <ipython-input-153-be7286f5b810> in <module>()
# 1 mask_alpha = df.index.map(lambda x:"alpha" in x)
# ----> 2 df.loc[mask_alpha,:]
# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
# 1323 except (KeyError, IndexError):
# 1324 pass
# -> 1325 return self._getitem_tuple(key)
# 1326 else:
# 1327 key = com._apply_if_callable(key, self.obj)
# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
# 839
# 840 # no multi-index, so validate all of the indexers
# --> 841 self._has_valid_tuple(tup)
# 842
# 843 # ugly hack for GH #836
# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_tuple(self, key)
# 187 if i >= self.obj.ndim:
# 188 raise IndexingError('Too many indexers')
# --> 189 if not self._has_valid_type(k, i):
# 190 raise ValueError("Location based indexing can only have [%s] "
# 191 "types" % self._valid_types)
# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_type(self, key, axis)
# 1416
# 1417 raise KeyError("None of [%s] are in the [%s]" %
# -> 1418 (key, self.obj._get_axis_name(axis)))
# 1419
# 1420 return True
# KeyError: "None of [Index([True, True, True, False, False], dtype='object')] are in the [index]"
BUT THIS WORKS
df.loc[mask_alpha.values,:]
# 0 1 2 3 4 5 6 7 8 9
# alpha_0 0.522739 0.792373 0.265758 0.771431 0.122409 0.926443 0.769170 0.049860 0.913113 0.080249
# alpha_1 0.353297 0.916381 0.218976 0.911158 0.016677 0.216542 0.690162 0.411131 0.496376 0.703492
# alpha_2 0.637349 0.110855 0.310767 0.979740 0.842421 0.592446 0.223231 0.351051 0.478721 0.488652