Consistent CategoricalDtype use in Categorical init · pandas-dev/pandas@416d1d7 (original) (raw)
`@@ -234,6 +234,21 @@ class Categorical(PandasObject):
`
234
234
`def init(self, values, categories=None, ordered=None, dtype=None,
`
235
235
`fastpath=False):
`
236
236
``
``
237
`+
Ways of specifying the dtype (prioritized ordered)
`
``
238
`+
1. dtype is a CategoricalDtype
`
``
239
`+
a.) with known categories, use dtype.categories
`
``
240
`+
b.) else with Categorical values, use values.dtype
`
``
241
`+
c.) else, infer from values
`
``
242
`+
d.) specifying dtype=CategoricalDtype and categories is an error
`
``
243
`+
2. dtype is a string 'category'
`
``
244
`+
a.) use categories, ordered
`
``
245
`+
b.) use values.dtype
`
``
246
`+
c.) infer from values
`
``
247
`+
3. dtype is None
`
``
248
`+
a.) use categories, ordered
`
``
249
`+
b.) use values.dtype
`
``
250
`+
c.) infer from values
`
``
251
+
237
252
`if dtype is not None:
`
238
253
`if isinstance(dtype, compat.string_types):
`
239
254
`if dtype == 'category':
`
`@@ -247,20 +262,24 @@ def init(self, values, categories=None, ordered=None, dtype=None,
`
247
262
`categories = dtype.categories
`
248
263
`ordered = dtype.ordered
`
249
264
``
250
``
`-
if ordered is None:
`
251
``
`-
ordered = False
`
``
265
`+
elif is_categorical(values):
`
``
266
`+
dtype = values.dtype._from_categorical_dtype(values.dtype,
`
``
267
`+
categories, ordered)
`
``
268
`+
else:
`
``
269
`+
dtype = CategoricalDtype(categories, ordered)
`
``
270
+
``
271
`+
At this point, dtype is always a CategoricalDtype
`
``
272
`+
if dtype.categories is None, we are inferring
`
252
273
``
253
274
`if fastpath:
`
254
``
`-
if dtype is None:
`
255
``
`-
dtype = CategoricalDtype(categories, ordered)
`
256
275
`self._codes = coerce_indexer_dtype(values, categories)
`
257
276
`self._dtype = dtype
`
258
277
`return
`
259
278
``
260
279
`# sanitize input
`
261
280
`if is_categorical_dtype(values):
`
262
281
``
263
``
`-
we are either a Series, CategoricalIndex
`
``
282
`+
we are either a Series or a CategoricalIndex
`
264
283
`if isinstance(values, (ABCSeries, ABCCategoricalIndex)):
`
265
284
`values = values._values
`
266
285
``
`@@ -271,6 +290,7 @@ def init(self, values, categories=None, ordered=None, dtype=None,
`
271
290
`values = values.get_values()
`
272
291
``
273
292
`elif isinstance(values, (ABCIndexClass, ABCSeries)):
`
``
293
`+
we'll do inference later
`
274
294
`pass
`
275
295
``
276
296
`else:
`
`@@ -288,12 +308,12 @@ def init(self, values, categories=None, ordered=None, dtype=None,
`
288
308
`# "object" dtype to prevent this. In the end objects will be
`
289
309
`# casted to int/... in the category assignment step.
`
290
310
`if len(values) == 0 or isna(values).any():
`
291
``
`-
dtype = 'object'
`
``
311
`+
sanitize_dtype = 'object'
`
292
312
`else:
`
293
``
`-
dtype = None
`
294
``
`-
values = _sanitize_array(values, None, dtype=dtype)
`
``
313
`+
sanitize_dtype = None
`
``
314
`+
values = _sanitize_array(values, None, dtype=sanitize_dtype)
`
295
315
``
296
``
`-
if categories is None:
`
``
316
`+
if dtype.categories is None:
`
297
317
`try:
`
298
318
`codes, categories = factorize(values, sort=True)
`
299
319
`except TypeError:
`
`@@ -310,7 +330,8 @@ def init(self, values, categories=None, ordered=None, dtype=None,
`
310
330
`raise NotImplementedError("> 1 ndim Categorical are not "
`
311
331
`"supported at this time")
`
312
332
``
313
``
`-
if dtype is None or isinstance(dtype, str):
`
``
333
`+
if dtype.categories is None:
`
``
334
`+
we're inferring from values
`
314
335
`dtype = CategoricalDtype(categories, ordered)
`
315
336
``
316
337
`else:
`
`@@ -321,11 +342,6 @@ def init(self, values, categories=None, ordered=None, dtype=None,
`
321
342
`# - the new one, where each value is also in the categories array
`
322
343
`# (or np.nan)
`
323
344
``
324
``
`-
make sure that we always have the same type here, no matter what
`
325
``
`-
we get passed in
`
326
``
`-
if dtype is None or isinstance(dtype, str):
`
327
``
`-
dtype = CategoricalDtype(categories, ordered)
`
328
``
-
329
345
`codes = _get_codes_for_values(values, dtype.categories)
`
330
346
``
331
347
`# TODO: check for old style usage. These warnings should be removes
`