BUG: values
argument ignored when also supplied to index
/columns
in pivot_table
· Issue #61292 · pandas-dev/pandas (original) (raw)
import numpy as np import pandas as pd from pandas import Index, MultiIndex import pandas.testing as tm
def test_pivot_table_values_in_columns():
"""values
arg is shared between values
and columns
."""
data = [
["A", 1, 50, -1],
["B", 1, 100, -2],
["A", 2, 100, -2],
["B", 2, 200, -4],
]
df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"])
result = df.pivot_table(values="value", index="index", columns=["col", "value"])
nan = np.nan
e_data = [
[50.0, nan, 100.0, nan],
[nan, 100.0, nan, 200.0],
]
e_index = Index(data=["A", "B"], name="index")
e_cols = MultiIndex.from_arrays(
arrays=[[1, 1, 2, 2], [50, 100, 100, 200]], names=["col", "value"]
)
expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols)
tm.assert_frame_equal(left=result, right=expected)
def test_pivot_table_values_in_index():
"""values
arg is shared between values
and index
."""
data = [
["A", 1, 50, -1],
["B", 1, 100, -2],
["A", 2, 100, -2],
["B", 2, 200, -4],
]
df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"])
result = df.pivot_table(values="value", index=["index", "value"], columns="col")
nan = np.nan
e_data = [
[50.0, nan],
[nan, 100.0],
[100.0, nan],
[nan, 200.0],
]
e_index = MultiIndex.from_arrays(
arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]], names=["index", "value"]
)
e_cols = Index(data=[1, 2], name="col")
expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols)
tm.assert_frame_equal(left=result, right=expected)
test_pivot_table_values_in_columns() # Fails. test_pivot_table_values_in_index() # Fails.
When the column supplied to values
in pandas.DataFrame.pivot_table is also supplied to index
or columns
, the resulting DataFrame
does not contain the aggregations of the values
argument. If any extra column(s) are present, those columns are aggregated instead of those supplied to values
. This is similar to issue #57876, but the additional columns result in a non-empty DataFrame
.
I would expect the two tests above to pass, i.e., the values
arg is aggregated instead of the non-supplied "extra" column.
Expected output of test_pivot_table_values_in_columns
:
col 1 2
value 50 100 100 200
index
A 50.0 NaN 100.0 NaN
B NaN 100.0 NaN 200.0
Expected output of test_pivot_table_values_in_index
:
col 1 2
index value
A 50 50.0 NaN
100 NaN 100.0
B 100 100.0 NaN
200 NaN 200.0