BUG: crosstab(dropna=False) did not keep np.nan in result (#53205) · pandas-dev/pandas@3cfd868 (original) (raw)

`@@ -286,24 +286,29 @@ def test_margin_dropna4(self):

`

286

286

`# GH 12642

`

287

287

`# _add_margins raises KeyError: Level None not found

`

288

288

`# when margins=True and dropna=False

`

``

289

`+

GH: 10772: Keep np.nan in result with dropna=False

`

289

290

`df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})

`

290

291

`actual = crosstab(df.a, df.b, margins=True, dropna=False)

`

291

``

`-

expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])

`

292

``

`-

expected.index = Index([1.0, 2.0, "All"], name="a")

`

``

292

`+

expected = DataFrame([[1, 0, 1.0], [1, 3, 4.0], [0, 1, np.nan], [2, 4, 6.0]])

`

``

293

`+

expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")

`

293

294

`expected.columns = Index([3, 4, "All"], name="b")

`

294

295

`tm.assert_frame_equal(actual, expected)

`

295

296

``

296

297

`def test_margin_dropna5(self):

`

``

298

`+

GH: 10772: Keep np.nan in result with dropna=False

`

297

299

`df = DataFrame(

`

298

300

` {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}

`

299

301

` )

`

300

302

`actual = crosstab(df.a, df.b, margins=True, dropna=False)

`

301

``

`-

expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])

`

302

``

`-

expected.index = Index([1.0, 2.0, "All"], name="a")

`

303

``

`-

expected.columns = Index([3.0, 4.0, "All"], name="b")

`

``

303

`+

expected = DataFrame(

`

``

304

`+

[[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, np.nan], [1, 4, 0, 6.0]]

`

``

305

`+

)

`

``

306

`+

expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")

`

``

307

`+

expected.columns = Index([3.0, 4.0, np.nan, "All"], name="b")

`

304

308

`tm.assert_frame_equal(actual, expected)

`

305

309

``

306

310

`def test_margin_dropna6(self):

`

``

311

`+

GH: 10772: Keep np.nan in result with dropna=False

`

307

312

`a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)

`

308

313

`b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)

`

309

314

`c = np.array(

`

`@@ -315,13 +320,14 @@ def test_margin_dropna6(self):

`

315

320

` )

`

316

321

`m = MultiIndex.from_arrays(

`

317

322

` [

`

318

``

`-

["one", "one", "two", "two", "All"],

`

319

``

`-

["dull", "shiny", "dull", "shiny", ""],

`

``

323

`+

["one", "one", "two", "two", np.nan, np.nan, "All"],

`

``

324

`+

["dull", "shiny", "dull", "shiny", "dull", "shiny", ""],

`

320

325

` ],

`

321

326

`names=["b", "c"],

`

322

327

` )

`

323

328

`expected = DataFrame(

`

324

``

`-

[[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m

`

``

329

`+

[[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 0, 7]],

`

``

330

`+

columns=m,

`

325

331

` )

`

326

332

`expected.index = Index(["bar", "foo", "All"], name="a")

`

327

333

`tm.assert_frame_equal(actual, expected)

`

`@@ -330,11 +336,23 @@ def test_margin_dropna6(self):

`

330

336

` [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False

`

331

337

` )

`

332

338

`m = MultiIndex.from_arrays(

`

333

``

`-

[["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],

`

``

339

`+

[

`

``

340

`+

["bar", "bar", "bar", "foo", "foo", "foo", "All"],

`

``

341

`+

["one", "two", np.nan, "one", "two", np.nan, ""],

`

``

342

`+

],

`

334

343

`names=["a", "b"],

`

335

344

` )

`

336

345

`expected = DataFrame(

`

337

``

`-

[[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m

`

``

346

`+

[

`

``

347

`+

[1, 0, 1.0],

`

``

348

`+

[1, 0, 1.0],

`

``

349

`+

[0, 0, np.nan],

`

``

350

`+

[2, 0, 2.0],

`

``

351

`+

[1, 1, 2.0],

`

``

352

`+

[0, 1, np.nan],

`

``

353

`+

[5, 2, 7.0],

`

``

354

`+

],

`

``

355

`+

index=m,

`

338

356

` )

`

339

357

`expected.columns = Index(["dull", "shiny", "All"], name="c")

`

340

358

`tm.assert_frame_equal(actual, expected)

`