BUG: Copy categorical codes if empty (fixes #18051) (#18436) · pandas-dev/pandas@b45325e (original) (raw)

3 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -136,6 +136,7 @@ Categorical
136 136 - Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`)
137 137 - Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`)
138 138 - ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
139 +- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`)
139 140
140 141 Other
141 142 ^^^^^
Original file line number Diff line number Diff line change
@@ -2276,7 +2276,7 @@ def _recode_for_categories(codes, old_categories, new_categories):
2276 2276
2277 2277 if len(old_categories) == 0:
2278 2278 # All null anyway, so just retain the nulls
2279 -return codes
2279 +return codes.copy()
2280 2280 indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories),
2281 2281 new_categories)
2282 2282 new_codes = take_1d(indexer, codes.copy(), fill_value=-1)
Original file line number Diff line number Diff line change
@@ -848,6 +848,12 @@ def test_value_counts_nunique(self):
848 848 result = series.nunique()
849 849 assert result == 11
850 850
851 +# GH 18051
852 +s = pd.Series(pd.Categorical([]))
853 +assert s.nunique() == 0
854 +s = pd.Series(pd.Categorical([np.nan]))
855 +assert s.nunique() == 0
856 +
851 857 def test_unique(self):
852 858
853 859 # 714 also, dtype=float
@@ -873,6 +879,14 @@ def test_unique(self):
873 879 expected = np.array([1, 2, 3, None], dtype=object)
874 880 tm.assert_numpy_array_equal(result, expected)
875 881
882 +# GH 18051
883 +s = pd.Series(pd.Categorical([]))
884 +tm.assert_categorical_equal(s.unique(), pd.Categorical([]),
885 +check_dtype=False)
886 +s = pd.Series(pd.Categorical([np.nan]))
887 +tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]),
888 +check_dtype=False)
889 +
876 890 @pytest.mark.parametrize(
877 891 "tc1, tc2",
878 892 [