ENH: concat and append now can handle unordered categories by sinhrks · Pull Request #13767 · pandas-dev/pandas (original) (raw)
# different categoricals
pd.concat([pd.Series([1, 2], dtype='category'),
pd.Series([3, 4], dtype='category')])
# ValueError: incompatible categories in categorical concat
# categorical + normal (values are contained in categories) -> object dtype
pd.concat([pd.Series([1, 2], dtype='category'),
pd.Series([1, 2])])
#0 1
#1 2
#0 1
#1 2
# dtype: object
# categorical + normal (values are not contained in categories) -> object dtype
pd.concat([pd.Series([1, 2], dtype='category'),
pd.Series([3, 4])])
#0 1
#1 2
#0 3
#1 4
# dtype: object
# different categoricals
pd.concat([pd.Series([1, 2], dtype='category'),
pd.Series([3, 4], dtype='category')])
#0 1
#1 2
#0 3
#1 4
# dtype: int64
# specifying union_categoricals keeps category if possible
pd.concat([pd.Series([1, 2], dtype='category'),
pd.Series([3, 4], dtype='category')], union_categoricals=True)
#0 1
#1 2
#0 3
#1 4
# dtype: category
Categories (4, int64): [1, 2, 3, 4]
# categorical + normal (values are contained in categories) -> category dtype
pd.concat([pd.Series([1, 2], dtype='category'),
pd.Series([1, 2])])
#0 1
#1 2
#0 1
#1 2
# dtype: int64
# categorical + normal (values are not contained in categories) -> int dtype (keep original dtype)
pd.concat([pd.Series([1, 2], dtype='category'),
pd.Series([3, 4])])
#0 1
#1 2
#0 3
#1 4
# dtype: int64