BUG: rolling apply on axis=1 produces value error · Issue #45912 · pandas-dev/pandas (original) (raw)
Pandas version checks
- I have checked that this issue has not already been reported.
- I have confirmed this bug exists on the latest version of pandas.
- I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd import numpy as np df = pd.DataFrame([[1], [2]]) df.rolling(window=1, axis=1).apply(np.sum)
Issue Description
When I run the code in the example, I get a ValueError
:
Stack trace
ValueError Traceback (most recent call last)
Input In [23], in <module>
----> 1 df.rolling(window=1, axis=1).apply(sum)
File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:1842, in Rolling.apply(self, func, raw, engine, engine_kwargs, args, kwargs)
1821 @doc(
1822 template_header,
1823 create_section_header("Parameters"),
(...)
1840 kwargs: dict[str, Any] | None = None,
1841 ):
-> 1842 return super().apply(
1843 func,
1844 raw=raw,
1845 engine=engine,
1846 engine_kwargs=engine_kwargs,
1847 args=args,
1848 kwargs=kwargs,
1849 )
File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:1321, in RollingAndExpandingMixin.apply(self, func, raw, engine, engine_kwargs, args, kwargs)
1318 else:
1319 raise ValueError("engine must be either 'numba' or 'cython'")
-> 1321 return self._apply(
1322 apply_func,
1323 numba_cache_key=numba_cache_key,
1324 numba_args=numba_args,
1325 )
File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:590, in BaseWindow._apply(self, func, name, numba_cache_key, numba_args, **kwargs)
587 return result
589 if self.method == "single":
--> 590 return self._apply_blockwise(homogeneous_func, name)
591 else:
592 return self._apply_tablewise(homogeneous_func, name)
File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:462, in BaseWindow._apply_blockwise(self, homogeneous_func, name)
459 for i, arr in enumerate(obj._iter_column_arrays()):
460 # GH#42736 operate column-wise instead of block-wise
461 try:
--> 462 res = hfunc(arr)
463 except (TypeError, NotImplementedError):
464 pass
File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:452, in BaseWindow._apply_blockwise.<locals>.hfunc(values)
450 def hfunc(values: ArrayLike) -> ArrayLike:
451 values = self._prep_values(values)
--> 452 return homogeneous_func(values)
File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:582, in BaseWindow._apply.<locals>.homogeneous_func(values)
579 return func(x, start, end, min_periods, *numba_args)
581 with np.errstate(all="ignore"):
--> 582 result = calc(values)
584 if numba_cache_key is not None:
585 NUMBA_FUNC_CACHE[numba_cache_key] = func
File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:579, in BaseWindow._apply.<locals>.homogeneous_func.<locals>.calc(x)
571 start, end = window_indexer.get_window_bounds(
572 num_values=len(x),
573 min_periods=min_periods,
574 center=self.center,
575 closed=self.closed,
576 )
577 self._check_window_bounds(start, end, len(x))
--> 579 return func(x, start, end, min_periods, *numba_args)
File /usr/local/lib/python3.9/site-packages/pandas/core/window/rolling.py:1346, in RollingAndExpandingMixin._generate_cython_apply_func.<locals>.apply_func(values, begin, end, min_periods, raw)
1344 def apply_func(values, begin, end, min_periods, raw=raw):
1345 if not raw:
-> 1346 values = Series(values, index=self.obj.index)
1347 return window_func(values, begin, end, min_periods)
File /usr/local/lib/python3.9/site-packages/pandas/core/series.py:443, in Series.__init__(self, data, index, dtype, name, copy, fastpath)
441 index = default_index(len(data))
442 elif is_list_like(data):
--> 443 com.require_length_match(data, index)
445 # create/copy the manager
446 if isinstance(data, (SingleBlockManager, SingleArrayManager)):
File /usr/local/lib/python3.9/site-packages/pandas/core/common.py:557, in require_length_match(data, index)
553 """
554 Check the length of data matches the length of the index.
555 """
556 if len(data) != len(index):
--> 557 raise ValueError(
558 "Length of values "
559 f"({len(data)}) "
560 "does not match length of index "
561 f"({len(index)})"
562 )
ValueError: Length of values (1) does not match length of index (2)
I think the Series here should have index=self.obj.columns
when axis
is 1.
Expected Behavior
When applying numpy.sum
, I expect the same behavior as for a rolling sum. The rolling sum
import pandas as pd df = pd.DataFrame([[1], [2]]) df.rolling(window=1, axis=1).sum()
produces
Installed Versions
INSTALLED VERSIONS
commit : bb1f651
python : 3.9.10.final.0
python-bits : 64
OS : Darwin
OS-release : 21.3.0
Version : Darwin Kernel Version 21.3.0: Wed Jan 5 21:37:58 PST 2022; root:xnu-8019.80.24~20/RELEASE_X86_64
machine : x86_64
processor : i386
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.4.0
numpy : 1.22.1
pytz : 2021.3
dateutil : 2.8.2
pip : 22.0.3
setuptools : 60.8.2
Cython : 0.29.27
pytest : 7.0.0
hypothesis : None
sphinx : 4.4.0
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : 2.9.3
jinja2 : 3.0.3
IPython : 8.0.0
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
fsspec : 2022.01.0
gcsfs : None
matplotlib : 3.5.1
numba : None
numexpr : 2.8.1
odfpy : None
openpyxl : 3.0.9
pandas_gbq : 0.16.0
pyarrow : 6.0.1
pyreadstat : None
pyxlsb : None
s3fs : 2022.01.0
scipy : 1.7.3
sqlalchemy : 1.4.31
tables : 3.7.0
tabulate : None
xarray : 0.20.2
xlrd : 2.0.1
xlwt : None
zstandard : None