ENH: Use Welford's method in stats.moments.rolling_var by jaimefrio · Pull Request #6817 · pandas-dev/pandas (original) (raw)
Some proof of the numerical stability claims:
In [1]: import numpy as np
In [2]: import pandas as pd
In [3]: np.random.seed(0)
In [4]: a = np.random.rand(10)
In [5]: a[5] = np.nan
Before this PR:
In [6]: pd.stats.moments.rolling_var(a, 3)
Out[6]:
array([ nan, nan, 0.00720519, 0.00749899, 0.00835439,
nan, nan, nan, 0.08136806, 0.10003762])
In [7]: pd.stats.moments.rolling_var(a + 1e6, 3)
Out[7]:
array([ nan, nan, 0.00748698, 0.0078125 , 0.0094401 ,
nan, nan, nan, 0.08235677, 0.10123698])
In [8]: pd.stats.moments.rolling_var(a + 1e9, 3)
Out[8]:
array([ nan, nan, 341.33333333, 341.33333333,
853.33333333, nan, nan, nan,
853.33333333, 1194.66666667])
After this PR:
In [6]: pd.stats.moments.rolling_var(a, 3)
Out[6]:
array([ nan, nan, 0.00720519, 0.00749899, 0.00835439,
nan, nan, nan, 0.08136806, 0.10003762])
In [7]: pd.stats.moments.rolling_var(a + 1e6, 3)
Out[7]:
array([ nan, nan, 0.00720519, 0.00749899, 0.00835439,
nan, nan, nan, 0.08136806, 0.10003762])
In [8]: pd.stats.moments.rolling_var(a + 1e9, 3)
Out[8]:
array([ nan, nan, 0.00720519, 0.00749899, 0.0083544 ,
nan, nan, nan, 0.08136812, 0.10003767])
In [9]: pd.stats.moments.rolling_var(a + 1e12, 3)
Out[9]:
array([ nan, nan, 0.00720175, 0.00749382, 0.00834783,
nan, nan, nan, 0.08135042, 0.10003433])
In [10]: pd.stats.moments.rolling_var(a + 1e15, 3)
Out[10]:
array([ nan, nan, 0.015625 , 0.015625 , 0.015625 ,
nan, nan, nan, 0.0234375, 0.078125 ])
In [11]: pd.stats.moments.rolling_var(a + 1e18, 3)
Out[11]: array([ nan, nan, 0., 0., 0., nan, nan, nan, 0., 0.])