BUG: values argument ignored when also supplied to index/columns in pivot_table · Issue #61292 · pandas-dev/pandas (original) (raw)

import numpy as np import pandas as pd from pandas import Index, MultiIndex import pandas.testing as tm

def test_pivot_table_values_in_columns(): """values arg is shared between values and columns.""" data = [ ["A", 1, 50, -1], ["B", 1, 100, -2], ["A", 2, 100, -2], ["B", 2, 200, -4], ] df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"]) result = df.pivot_table(values="value", index="index", columns=["col", "value"]) nan = np.nan e_data = [ [50.0, nan, 100.0, nan], [nan, 100.0, nan, 200.0], ] e_index = Index(data=["A", "B"], name="index") e_cols = MultiIndex.from_arrays( arrays=[[1, 1, 2, 2], [50, 100, 100, 200]], names=["col", "value"] ) expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) tm.assert_frame_equal(left=result, right=expected)

def test_pivot_table_values_in_index(): """values arg is shared between values and index.""" data = [ ["A", 1, 50, -1], ["B", 1, 100, -2], ["A", 2, 100, -2], ["B", 2, 200, -4], ] df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"]) result = df.pivot_table(values="value", index=["index", "value"], columns="col") nan = np.nan e_data = [ [50.0, nan], [nan, 100.0], [100.0, nan], [nan, 200.0], ] e_index = MultiIndex.from_arrays( arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]], names=["index", "value"] ) e_cols = Index(data=[1, 2], name="col") expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) tm.assert_frame_equal(left=result, right=expected)

test_pivot_table_values_in_columns() # Fails. test_pivot_table_values_in_index() # Fails.

When the column supplied to values in pandas.DataFrame.pivot_table is also supplied to index or columns, the resulting DataFrame does not contain the aggregations of the values argument. If any extra column(s) are present, those columns are aggregated instead of those supplied to values. This is similar to issue #57876, but the additional columns result in a non-empty DataFrame.

I would expect the two tests above to pass, i.e., the values arg is aggregated instead of the non-supplied "extra" column.

Expected output of test_pivot_table_values_in_columns:

col 1 2
value 50 100 100 200 index
A 50.0 NaN 100.0 NaN B NaN 100.0 NaN 200.0

Expected output of test_pivot_table_values_in_index:

col 1 2 index value
A 50 50.0 NaN 100 NaN 100.0 B 100 100.0 NaN 200 NaN 200.0