ENH: concat and append now can handle unordered categories by sinhrks · Pull Request #13767 · pandas-dev/pandas (original) (raw)

# different categoricals
pd.concat([pd.Series([1, 2], dtype='category'),
           pd.Series([3, 4], dtype='category')])
# ValueError: incompatible categories in categorical concat

# categorical + normal (values are contained in categories) -> object dtype
pd.concat([pd.Series([1, 2], dtype='category'),
           pd.Series([1, 2])])
#0    1
#1    2
#0    1
#1    2
# dtype: object

# categorical + normal (values are not contained in categories) -> object dtype
pd.concat([pd.Series([1, 2], dtype='category'),
           pd.Series([3, 4])])
#0    1
#1    2
#0    3
#1    4
# dtype: object
# different categoricals
pd.concat([pd.Series([1, 2], dtype='category'),
           pd.Series([3, 4], dtype='category')])
#0    1
#1    2
#0    3
#1    4
# dtype: int64

# specifying union_categoricals keeps category if possible
pd.concat([pd.Series([1, 2], dtype='category'),
           pd.Series([3, 4], dtype='category')], union_categoricals=True)
#0    1
#1    2
#0    3
#1    4
# dtype: category
Categories (4, int64): [1, 2, 3, 4]

# categorical + normal (values are contained in categories) -> category dtype
pd.concat([pd.Series([1, 2], dtype='category'),
           pd.Series([1, 2])])
#0    1
#1    2
#0    1
#1    2
# dtype: int64

# categorical + normal (values are not contained in categories) -> int dtype (keep original dtype)
pd.concat([pd.Series([1, 2], dtype='category'),
           pd.Series([3, 4])])
#0    1
#1    2
#0    3
#1    4
# dtype: int64