BUG: Copy categorical codes if empty (fixes #18051) (#18436) · pandas-dev/pandas@b45325e (original) (raw)
3 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -136,6 +136,7 @@ Categorical | ||
136 | 136 | - Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`) |
137 | 137 | - Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`) |
138 | 138 | - ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`) |
139 | +- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`) | |
139 | 140 | |
140 | 141 | Other |
141 | 142 | ^^^^^ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -2276,7 +2276,7 @@ def _recode_for_categories(codes, old_categories, new_categories): | ||
2276 | 2276 | |
2277 | 2277 | if len(old_categories) == 0: |
2278 | 2278 | # All null anyway, so just retain the nulls |
2279 | -return codes | |
2279 | +return codes.copy() | |
2280 | 2280 | indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories), |
2281 | 2281 | new_categories) |
2282 | 2282 | new_codes = take_1d(indexer, codes.copy(), fill_value=-1) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -848,6 +848,12 @@ def test_value_counts_nunique(self): | ||
848 | 848 | result = series.nunique() |
849 | 849 | assert result == 11 |
850 | 850 | |
851 | +# GH 18051 | |
852 | +s = pd.Series(pd.Categorical([])) | |
853 | +assert s.nunique() == 0 | |
854 | +s = pd.Series(pd.Categorical([np.nan])) | |
855 | +assert s.nunique() == 0 | |
856 | + | |
851 | 857 | def test_unique(self): |
852 | 858 | |
853 | 859 | # 714 also, dtype=float |
@@ -873,6 +879,14 @@ def test_unique(self): | ||
873 | 879 | expected = np.array([1, 2, 3, None], dtype=object) |
874 | 880 | tm.assert_numpy_array_equal(result, expected) |
875 | 881 | |
882 | +# GH 18051 | |
883 | +s = pd.Series(pd.Categorical([])) | |
884 | +tm.assert_categorical_equal(s.unique(), pd.Categorical([]), | |
885 | +check_dtype=False) | |
886 | +s = pd.Series(pd.Categorical([np.nan])) | |
887 | +tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]), | |
888 | +check_dtype=False) | |
889 | + | |
876 | 890 | @pytest.mark.parametrize( |
877 | 891 | "tc1, tc2", |
878 | 892 | [ |