Minor Bug: Boolean masking not working for Index objects (original) (raw)

To get the boolean mask to work, you must use the np.array representation instead of the pd.Index itself. In previous releases, this could be done. Referenced here: #17129 (comment)

df = pd.DataFrame(np.random.random(size=(5,10)), index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])

# 0	1	2	3	4	5	6	7	8	9
# alpha_0	0.039744	0.952401	0.926357	0.819101	0.326789	0.167907	0.400881	0.991539	0.014391	0.025816
# alpha_1	0.794766	0.089820	0.806102	0.196176	0.988310	0.241682	0.860940	0.454829	0.944760	0.306390
# alpha_2	0.976126	0.801022	0.499439	0.895672	0.754903	0.120859	0.013098	0.074128	0.900939	0.068916
# beta_0	0.674020	0.576319	0.471810	0.584221	0.071395	0.781328	0.933798	0.644589	0.233111	0.057664
# beta_1	0.657491	0.436408	0.867786	0.578557	0.080468	0.957060	0.646107	0.434932	0.807671	0.088512

mask_alpha = df.index.map(lambda x:"alpha" in x)

THIS DOESN"T WORK

df.loc[mask_alpha,:]

# ---------------------------------------------------------------------------
# KeyError                                  Traceback (most recent call last)
# <ipython-input-153-be7286f5b810> in <module>()
#       1 mask_alpha = df.index.map(lambda x:"alpha" in x)
# ----> 2 df.loc[mask_alpha,:]

# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
#    1323             except (KeyError, IndexError):
#    1324                 pass
# -> 1325             return self._getitem_tuple(key)
#    1326         else:
#    1327             key = com._apply_if_callable(key, self.obj)

# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
#     839 
#     840         # no multi-index, so validate all of the indexers
# --> 841         self._has_valid_tuple(tup)
#     842 
#     843         # ugly hack for GH #836

# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_tuple(self, key)
#     187             if i >= self.obj.ndim:
#     188                 raise IndexingError('Too many indexers')
# --> 189             if not self._has_valid_type(k, i):
#     190                 raise ValueError("Location based indexing can only have [%s] "
#     191                                  "types" % self._valid_types)

# ~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_type(self, key, axis)
#    1416 
#    1417                 raise KeyError("None of [%s] are in the [%s]" %
# -> 1418                                (key, self.obj._get_axis_name(axis)))
#    1419 
#    1420             return True

# KeyError: "None of [Index([True, True, True, False, False], dtype='object')] are in the [index]"

BUT THIS WORKS

df.loc[mask_alpha.values,:]

# 0	1	2	3	4	5	6	7	8	9
# alpha_0	0.522739	0.792373	0.265758	0.771431	0.122409	0.926443	0.769170	0.049860	0.913113	0.080249
# alpha_1	0.353297	0.916381	0.218976	0.911158	0.016677	0.216542	0.690162	0.411131	0.496376	0.703492
# alpha_2	0.637349	0.110855	0.310767	0.979740	0.842421	0.592446	0.223231	0.351051	0.478721	0.488652