ENH: Use Welford's method in stats.moments.rolling_var by jaimefrio · Pull Request #6817 · pandas-dev/pandas (original) (raw)

Some proof of the numerical stability claims:

In [1]: import numpy as np
In [2]: import pandas as pd
In [3]: np.random.seed(0)
In [4]: a = np.random.rand(10)
In [5]: a[5] = np.nan

Before this PR:

In [6]: pd.stats.moments.rolling_var(a, 3)
Out[6]:
array([        nan,         nan,  0.00720519,  0.00749899,  0.00835439,
               nan,         nan,         nan,  0.08136806,  0.10003762])

In [7]: pd.stats.moments.rolling_var(a + 1e6, 3)
Out[7]:
array([        nan,         nan,  0.00748698,  0.0078125 ,  0.0094401 ,
               nan,         nan,         nan,  0.08235677,  0.10123698])

In [8]: pd.stats.moments.rolling_var(a + 1e9, 3)
Out[8]:
array([           nan,            nan,   341.33333333,   341.33333333,
         853.33333333,            nan,            nan,            nan,
         853.33333333,  1194.66666667])

After this PR:

In [6]: pd.stats.moments.rolling_var(a, 3)
Out[6]:
array([        nan,         nan,  0.00720519,  0.00749899,  0.00835439,
               nan,         nan,         nan,  0.08136806,  0.10003762])

In [7]: pd.stats.moments.rolling_var(a + 1e6, 3)
Out[7]:
array([        nan,         nan,  0.00720519,  0.00749899,  0.00835439,
               nan,         nan,         nan,  0.08136806,  0.10003762])

In [8]: pd.stats.moments.rolling_var(a + 1e9, 3)
Out[8]:
array([        nan,         nan,  0.00720519,  0.00749899,  0.0083544 ,
               nan,         nan,         nan,  0.08136812,  0.10003767])

In [9]: pd.stats.moments.rolling_var(a + 1e12, 3)
Out[9]:
array([        nan,         nan,  0.00720175,  0.00749382,  0.00834783,
               nan,         nan,         nan,  0.08135042,  0.10003433])

In [10]: pd.stats.moments.rolling_var(a + 1e15, 3)
Out[10]:
array([       nan,        nan,  0.015625 ,  0.015625 ,  0.015625 ,
              nan,        nan,        nan,  0.0234375,  0.078125 ])

In [11]: pd.stats.moments.rolling_var(a + 1e18, 3)
Out[11]: array([ nan,  nan,   0.,   0.,   0.,  nan,  nan,  nan,   0.,   0.])