Consistent CategoricalDtype use in Categorical init · pandas-dev/pandas@416d1d7 (original) (raw)

`@@ -234,6 +234,21 @@ class Categorical(PandasObject):

`

234

234

`def init(self, values, categories=None, ordered=None, dtype=None,

`

235

235

`fastpath=False):

`

236

236

``

``

237

`+

Ways of specifying the dtype (prioritized ordered)

`

``

238

`+

1. dtype is a CategoricalDtype

`

``

239

`+

a.) with known categories, use dtype.categories

`

``

240

`+

b.) else with Categorical values, use values.dtype

`

``

241

`+

c.) else, infer from values

`

``

242

`+

d.) specifying dtype=CategoricalDtype and categories is an error

`

``

243

`+

2. dtype is a string 'category'

`

``

244

`+

a.) use categories, ordered

`

``

245

`+

b.) use values.dtype

`

``

246

`+

c.) infer from values

`

``

247

`+

3. dtype is None

`

``

248

`+

a.) use categories, ordered

`

``

249

`+

b.) use values.dtype

`

``

250

`+

c.) infer from values

`

``

251

+

237

252

`if dtype is not None:

`

238

253

`if isinstance(dtype, compat.string_types):

`

239

254

`if dtype == 'category':

`

`@@ -247,20 +262,24 @@ def init(self, values, categories=None, ordered=None, dtype=None,

`

247

262

`categories = dtype.categories

`

248

263

`ordered = dtype.ordered

`

249

264

``

250

``

`-

if ordered is None:

`

251

``

`-

ordered = False

`

``

265

`+

elif is_categorical(values):

`

``

266

`+

dtype = values.dtype._from_categorical_dtype(values.dtype,

`

``

267

`+

categories, ordered)

`

``

268

`+

else:

`

``

269

`+

dtype = CategoricalDtype(categories, ordered)

`

``

270

+

``

271

`+

At this point, dtype is always a CategoricalDtype

`

``

272

`+

if dtype.categories is None, we are inferring

`

252

273

``

253

274

`if fastpath:

`

254

``

`-

if dtype is None:

`

255

``

`-

dtype = CategoricalDtype(categories, ordered)

`

256

275

`self._codes = coerce_indexer_dtype(values, categories)

`

257

276

`self._dtype = dtype

`

258

277

`return

`

259

278

``

260

279

`# sanitize input

`

261

280

`if is_categorical_dtype(values):

`

262

281

``

263

``

`-

we are either a Series, CategoricalIndex

`

``

282

`+

we are either a Series or a CategoricalIndex

`

264

283

`if isinstance(values, (ABCSeries, ABCCategoricalIndex)):

`

265

284

`values = values._values

`

266

285

``

`@@ -271,6 +290,7 @@ def init(self, values, categories=None, ordered=None, dtype=None,

`

271

290

`values = values.get_values()

`

272

291

``

273

292

`elif isinstance(values, (ABCIndexClass, ABCSeries)):

`

``

293

`+

we'll do inference later

`

274

294

`pass

`

275

295

``

276

296

`else:

`

`@@ -288,12 +308,12 @@ def init(self, values, categories=None, ordered=None, dtype=None,

`

288

308

`# "object" dtype to prevent this. In the end objects will be

`

289

309

`# casted to int/... in the category assignment step.

`

290

310

`if len(values) == 0 or isna(values).any():

`

291

``

`-

dtype = 'object'

`

``

311

`+

sanitize_dtype = 'object'

`

292

312

`else:

`

293

``

`-

dtype = None

`

294

``

`-

values = _sanitize_array(values, None, dtype=dtype)

`

``

313

`+

sanitize_dtype = None

`

``

314

`+

values = _sanitize_array(values, None, dtype=sanitize_dtype)

`

295

315

``

296

``

`-

if categories is None:

`

``

316

`+

if dtype.categories is None:

`

297

317

`try:

`

298

318

`codes, categories = factorize(values, sort=True)

`

299

319

`except TypeError:

`

`@@ -310,7 +330,8 @@ def init(self, values, categories=None, ordered=None, dtype=None,

`

310

330

`raise NotImplementedError("> 1 ndim Categorical are not "

`

311

331

`"supported at this time")

`

312

332

``

313

``

`-

if dtype is None or isinstance(dtype, str):

`

``

333

`+

if dtype.categories is None:

`

``

334

`+

we're inferring from values

`

314

335

`dtype = CategoricalDtype(categories, ordered)

`

315

336

``

316

337

`else:

`

`@@ -321,11 +342,6 @@ def init(self, values, categories=None, ordered=None, dtype=None,

`

321

342

`# - the new one, where each value is also in the categories array

`

322

343

`# (or np.nan)

`

323

344

``

324

``

`-

make sure that we always have the same type here, no matter what

`

325

``

`-

we get passed in

`

326

``

`-

if dtype is None or isinstance(dtype, str):

`

327

``

`-

dtype = CategoricalDtype(categories, ordered)

`

328

``

-

329

345

`codes = _get_codes_for_values(values, dtype.categories)

`

330

346

``

331

347

`# TODO: check for old style usage. These warnings should be removes

`