Minor Bug: Boolean masking not working for Index objects · Issue #17131 · pandas-dev/pandas (original) (raw)

To get the boolean mask to work, you must use the np.array representation instead of the pd.Index itself. In previous releases, this could be done. Referenced here: #17129 (comment)

df = pd.DataFrame(np.random.random(size=(5,10)), index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])

# 0	1	2	3	4	5	6	7	8	9
# alpha_0	0.039744	0.952401	0.926357	0.819101	0.326789	0.167907	0.400881	0.991539	0.014391	0.025816
# alpha_1	0.794766	0.089820	0.806102	0.196176	0.988310	0.241682	0.860940	0.454829	0.944760	0.306390
# alpha_2	0.976126	0.801022	0.499439	0.895672	0.754903	0.120859	0.013098	0.074128	0.900939	0.068916
# beta_0	0.674020	0.576319	0.471810	0.584221	0.071395	0.781328	0.933798	0.644589	0.233111	0.057664
# beta_1	0.657491	0.436408	0.867786	0.578557	0.080468	0.957060	0.646107	0.434932	0.807671	0.088512

mask_alpha = df.index.map(lambda x:"alpha" in x)

THIS DOESN"T WORK

df.loc[mask_alpha,:]

# ---------------------------------------------------------------------------
# KeyError                                  Traceback (most recent call last)
# <ipython-input-153-be7286f5b810> in <module>()
#       1 mask_alpha = df.index.map(lambda x:"alpha" in x)
# ----> 2 df.loc[mask_alpha,:]

# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
#    1323             except (KeyError, IndexError):
#    1324                 pass
# -> 1325             return self._getitem_tuple(key)
#    1326         else:
#    1327             key = com._apply_if_callable(key, self.obj)

# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
#     839 
#     840         # no multi-index, so validate all of the indexers
# --> 841         self._has_valid_tuple(tup)
#     842 
#     843         # ugly hack for GH #836

# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_tuple(self, key)
#     187             if i >= self.obj.ndim:
#     188                 raise IndexingError('Too many indexers')
# --> 189             if not self._has_valid_type(k, i):
#     190                 raise ValueError("Location based indexing can only have [%s] "
#     191                                  "types" % self._valid_types)

# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_type(self, key, axis)
#    1416 
#    1417                 raise KeyError("None of [%s] are in the [%s]" %
# -> 1418                                (key, self.obj._get_axis_name(axis)))
#    1419 
#    1420             return True

# KeyError: "None of [Index([True, True, True, False, False], dtype='object')] are in the [index]"

BUT THIS WORKS

df.loc[mask_alpha.values,:]

# 0	1	2	3	4	5	6	7	8	9
# alpha_0	0.522739	0.792373	0.265758	0.771431	0.122409	0.926443	0.769170	0.049860	0.913113	0.080249
# alpha_1	0.353297	0.916381	0.218976	0.911158	0.016677	0.216542	0.690162	0.411131	0.496376	0.703492
# alpha_2	0.637349	0.110855	0.310767	0.979740	0.842421	0.592446	0.223231	0.351051	0.478721	0.488652