BUG: rolling apply on axis=1 produces value error · Issue #45912 · pandas-dev/pandas (original) (raw)

Pandas version checks

Reproducible Example

import pandas as pd import numpy as np df = pd.DataFrame([[1], [2]]) df.rolling(window=1, axis=1).apply(np.sum)

Issue Description

When I run the code in the example, I get a ValueError:

Stack trace

ValueError                                Traceback (most recent call last)
Input In [23], in <module>
----> 1 df.rolling(window=1, axis=1).apply(sum)

File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:1842, in Rolling.apply(self, func, raw, engine, engine_kwargs, args, kwargs)
   1821 @doc(
   1822     template_header,
   1823     create_section_header("Parameters"),
   (...)
   1840     kwargs: dict[str, Any] | None = None,
   1841 ):
-> 1842     return super().apply(
   1843         func,
   1844         raw=raw,
   1845         engine=engine,
   1846         engine_kwargs=engine_kwargs,
   1847         args=args,
   1848         kwargs=kwargs,
   1849     )

File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:1321, in RollingAndExpandingMixin.apply(self, func, raw, engine, engine_kwargs, args, kwargs)
   1318 else:
   1319     raise ValueError("engine must be either 'numba' or 'cython'")
-> 1321 return self._apply(
   1322     apply_func,
   1323     numba_cache_key=numba_cache_key,
   1324     numba_args=numba_args,
   1325 )

File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:590, in BaseWindow._apply(self, func, name, numba_cache_key, numba_args, **kwargs)
    587     return result
    589 if self.method == "single":
--> 590     return self._apply_blockwise(homogeneous_func, name)
    591 else:
    592     return self._apply_tablewise(homogeneous_func, name)

File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:462, in BaseWindow._apply_blockwise(self, homogeneous_func, name)
    459 for i, arr in enumerate(obj._iter_column_arrays()):
    460     # GH#42736 operate column-wise instead of block-wise
    461     try:
--> 462         res = hfunc(arr)
    463     except (TypeError, NotImplementedError):
    464         pass

File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:452, in BaseWindow._apply_blockwise.<locals>.hfunc(values)
    450 def hfunc(values: ArrayLike) -> ArrayLike:
    451     values = self._prep_values(values)
--> 452     return homogeneous_func(values)

File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:582, in BaseWindow._apply.<locals>.homogeneous_func(values)
    579     return func(x, start, end, min_periods, *numba_args)
    581 with np.errstate(all="ignore"):
--> 582     result = calc(values)
    584 if numba_cache_key is not None:
    585     NUMBA_FUNC_CACHE[numba_cache_key] = func

File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:579, in BaseWindow._apply.<locals>.homogeneous_func.<locals>.calc(x)
    571 start, end = window_indexer.get_window_bounds(
    572     num_values=len(x),
    573     min_periods=min_periods,
    574     center=self.center,
    575     closed=self.closed,
    576 )
    577 self._check_window_bounds(start, end, len(x))
--> 579 return func(x, start, end, min_periods, *numba_args)

File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:1346, in RollingAndExpandingMixin._generate_cython_apply_func.<locals>.apply_func(values, begin, end, min_periods, raw)
   1344 def apply_func(values, begin, end, min_periods, raw=raw):
   1345     if not raw:
-> 1346         values = Series(values, index=self.obj.index)
   1347     return window_func(values, begin, end, min_periods)

File /usr/local/lib/python3.9/site-packages/pandas/core/series.py:443, in Series.__init__(self, data, index, dtype, name, copy, fastpath)
    441     index = default_index(len(data))
    442 elif is_list_like(data):
--> 443     com.require_length_match(data, index)
    445 # create/copy the manager
    446 if isinstance(data, (SingleBlockManager, SingleArrayManager)):

File /usr/local/lib/python3.9/site-packages/pandas/core/common.py:557, in require_length_match(data, index)
    553 """
    554 Check the length of data matches the length of the index.
    555 """
    556 if len(data) != len(index):
--> 557     raise ValueError(
    558         "Length of values "
    559         f"({len(data)}) "
    560         "does not match length of index "
    561         f"({len(index)})"
    562     )

ValueError: Length of values (1) does not match length of index (2)

I think the Series here should have index=self.obj.columns when axis is 1.

Expected Behavior

When applying numpy.sum, I expect the same behavior as for a rolling sum. The rolling sum

import pandas as pd df = pd.DataFrame([[1], [2]]) df.rolling(window=1, axis=1).sum()

produces

Installed Versions

INSTALLED VERSIONS

commit : bb1f651
python : 3.9.10.final.0
python-bits : 64
OS : Darwin
OS-release : 21.3.0
Version : Darwin Kernel Version 21.3.0: Wed Jan 5 21:37:58 PST 2022; root:xnu-8019.80.24~20/RELEASE_X86_64
machine : x86_64
processor : i386
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8

pandas : 1.4.0
numpy : 1.22.1
pytz : 2021.3
dateutil : 2.8.2
pip : 22.0.3
setuptools : 60.8.2
Cython : 0.29.27
pytest : 7.0.0
hypothesis : None
sphinx : 4.4.0
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : 2.9.3
jinja2 : 3.0.3
IPython : 8.0.0
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
fsspec : 2022.01.0
gcsfs : None
matplotlib : 3.5.1
numba : None
numexpr : 2.8.1
odfpy : None
openpyxl : 3.0.9
pandas_gbq : 0.16.0
pyarrow : 6.0.1
pyreadstat : None
pyxlsb : None
s3fs : 2022.01.0
scipy : 1.7.3
sqlalchemy : 1.4.31
tables : 3.7.0
tabulate : None
xarray : 0.20.2
xlrd : 2.0.1
xlwt : None
zstandard : None