BUG: crosstab(dropna=False) did not keep np.nan in result (#53205) · pandas-dev/pandas@3cfd868 (original) (raw)
`@@ -286,24 +286,29 @@ def test_margin_dropna4(self):
`
286
286
`# GH 12642
`
287
287
`# _add_margins raises KeyError: Level None not found
`
288
288
`# when margins=True and dropna=False
`
``
289
`+
GH: 10772: Keep np.nan in result with dropna=False
`
289
290
`df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
`
290
291
`actual = crosstab(df.a, df.b, margins=True, dropna=False)
`
291
``
`-
expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
`
292
``
`-
expected.index = Index([1.0, 2.0, "All"], name="a")
`
``
292
`+
expected = DataFrame([[1, 0, 1.0], [1, 3, 4.0], [0, 1, np.nan], [2, 4, 6.0]])
`
``
293
`+
expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
`
293
294
`expected.columns = Index([3, 4, "All"], name="b")
`
294
295
`tm.assert_frame_equal(actual, expected)
`
295
296
``
296
297
`def test_margin_dropna5(self):
`
``
298
`+
GH: 10772: Keep np.nan in result with dropna=False
`
297
299
`df = DataFrame(
`
298
300
` {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
`
299
301
` )
`
300
302
`actual = crosstab(df.a, df.b, margins=True, dropna=False)
`
301
``
`-
expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
`
302
``
`-
expected.index = Index([1.0, 2.0, "All"], name="a")
`
303
``
`-
expected.columns = Index([3.0, 4.0, "All"], name="b")
`
``
303
`+
expected = DataFrame(
`
``
304
`+
[[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, np.nan], [1, 4, 0, 6.0]]
`
``
305
`+
)
`
``
306
`+
expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
`
``
307
`+
expected.columns = Index([3.0, 4.0, np.nan, "All"], name="b")
`
304
308
`tm.assert_frame_equal(actual, expected)
`
305
309
``
306
310
`def test_margin_dropna6(self):
`
``
311
`+
GH: 10772: Keep np.nan in result with dropna=False
`
307
312
`a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
`
308
313
`b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)
`
309
314
`c = np.array(
`
`@@ -315,13 +320,14 @@ def test_margin_dropna6(self):
`
315
320
` )
`
316
321
`m = MultiIndex.from_arrays(
`
317
322
` [
`
318
``
`-
["one", "one", "two", "two", "All"],
`
319
``
`-
["dull", "shiny", "dull", "shiny", ""],
`
``
323
`+
["one", "one", "two", "two", np.nan, np.nan, "All"],
`
``
324
`+
["dull", "shiny", "dull", "shiny", "dull", "shiny", ""],
`
320
325
` ],
`
321
326
`names=["b", "c"],
`
322
327
` )
`
323
328
`expected = DataFrame(
`
324
``
`-
[[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m
`
``
329
`+
[[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 0, 7]],
`
``
330
`+
columns=m,
`
325
331
` )
`
326
332
`expected.index = Index(["bar", "foo", "All"], name="a")
`
327
333
`tm.assert_frame_equal(actual, expected)
`
`@@ -330,11 +336,23 @@ def test_margin_dropna6(self):
`
330
336
` [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False
`
331
337
` )
`
332
338
`m = MultiIndex.from_arrays(
`
333
``
`-
[["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
`
``
339
`+
[
`
``
340
`+
["bar", "bar", "bar", "foo", "foo", "foo", "All"],
`
``
341
`+
["one", "two", np.nan, "one", "two", np.nan, ""],
`
``
342
`+
],
`
334
343
`names=["a", "b"],
`
335
344
` )
`
336
345
`expected = DataFrame(
`
337
``
`-
[[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m
`
``
346
`+
[
`
``
347
`+
[1, 0, 1.0],
`
``
348
`+
[1, 0, 1.0],
`
``
349
`+
[0, 0, np.nan],
`
``
350
`+
[2, 0, 2.0],
`
``
351
`+
[1, 1, 2.0],
`
``
352
`+
[0, 1, np.nan],
`
``
353
`+
[5, 2, 7.0],
`
``
354
`+
],
`
``
355
`+
index=m,
`
338
356
` )
`
339
357
`expected.columns = Index(["dull", "shiny", "All"], name="c")
`
340
358
`tm.assert_frame_equal(actual, expected)
`