BUG: ewma() weights incorrect when some values are missing (GH7543) · pandas-dev/pandas@24b309f (original) (raw)

`@@ -89,6 +89,9 @@

`

89

89

` imbalance in relative weightings (viewing EWMA as a moving average)

`

90

90

`how : string, default 'mean'

`

91

91

` Method for down- or re-sampling

`

``

92

`+

ignore_na : boolean, default False

`

``

93

`+

Ignore missing values when calculating weights;

`

``

94

`+

specify True to reproduce pre-0.15.0 behavior

`

92

95

`"""

`

93

96

``

94

97

`_ewm_notes = r"""

`

`@@ -420,12 +423,12 @@ def _get_center_of_mass(com, span, halflife):

`

420

423

`_type_of_input_retval, _ewm_notes)

`

421

424

`@Appender(_doc_template)

`

422

425

`def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None,

`

423

``

`-

adjust=True, how=None):

`

``

426

`+

adjust=True, how=None, ignore_na=False):

`

424

427

`com = _get_center_of_mass(com, span, halflife)

`

425

428

`arg = _conv_timerule(arg, freq, how)

`

426

429

``

427

430

`def _ewma(v):

`

428

``

`-

result = algos.ewma(v, com, int(adjust))

`

``

431

`+

result = algos.ewma(v, com, int(adjust), int(ignore_na))

`

429

432

`first_index = _first_valid_index(v)

`

430

433

`result[first_index: first_index + min_periods] = NaN

`

431

434

`return result

`

`@@ -444,11 +447,11 @@ def _first_valid_index(arr):

`

444

447

`_ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes)

`

445

448

`@Appender(_doc_template)

`

446

449

`def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,

`

447

``

`-

freq=None, how=None):

`

``

450

`+

freq=None, how=None, ignore_na=False):

`

448

451

`com = _get_center_of_mass(com, span, halflife)

`

449

452

`arg = _conv_timerule(arg, freq, how)

`

450

``

`-

moment2nd = ewma(arg * arg, com=com, min_periods=min_periods)

`

451

``

`-

moment1st = ewma(arg, com=com, min_periods=min_periods)

`

``

453

`+

moment2nd = ewma(arg * arg, com=com, min_periods=min_periods, ignore_na=ignore_na)

`

``

454

`+

moment1st = ewma(arg, com=com, min_periods=min_periods, ignore_na=ignore_na)

`

452

455

``

453

456

`result = moment2nd - moment1st ** 2

`

454

457

`if not bias:

`

`@@ -460,9 +463,10 @@ def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,

`

460

463

`@Substitution("Exponentially-weighted moving std", _unary_arg,

`

461

464

`_ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes)

`

462

465

`@Appender(_doc_template)

`

463

``

`-

def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False):

`

``

466

`+

def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,

`

``

467

`+

ignore_na=False):

`

464

468

`result = ewmvar(arg, com=com, span=span, halflife=halflife,

`

465

``

`-

min_periods=min_periods, bias=bias)

`

``

469

`+

min_periods=min_periods, bias=bias, ignore_na=ignore_na)

`

466

470

`return _zsqrt(result)

`

467

471

``

468

472

`ewmvol = ewmstd

`

`@@ -472,7 +476,7 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False):

`

472

476

`_ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes)

`

473

477

`@Appender(_doc_template)

`

474

478

`def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,

`

475

``

`-

bias=False, freq=None, pairwise=None, how=None):

`

``

479

`+

bias=False, freq=None, pairwise=None, how=None, ignore_na=False):

`

476

480

`if arg2 is None:

`

477

481

`arg2 = arg1

`

478

482

`pairwise = True if pairwise is None else pairwise

`

`@@ -484,7 +488,8 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,

`

484

488

`arg2 = _conv_timerule(arg2, freq, how)

`

485

489

``

486

490

`def _get_ewmcov(X, Y):

`

487

``

`-

mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)

`

``

491

`+

mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods,

`

``

492

`+

ignore_na=ignore_na)

`

488

493

`return (mean(X * Y) - mean(X) * mean(Y))

`

489

494

`result = _flex_binary_moment(arg1, arg2, _get_ewmcov,

`

490

495

`pairwise=bool(pairwise))

`

`@@ -499,7 +504,7 @@ def _get_ewmcov(X, Y):

`

499

504

`_ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes)

`

500

505

`@Appender(_doc_template)

`

501

506

`def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,

`

502

``

`-

freq=None, pairwise=None, how=None):

`

``

507

`+

freq=None, pairwise=None, how=None, ignore_na=False):

`

503

508

`if arg2 is None:

`

504

509

`arg2 = arg1

`

505

510

`pairwise = True if pairwise is None else pairwise

`

`@@ -511,9 +516,10 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,

`

511

516

`arg2 = _conv_timerule(arg2, freq, how)

`

512

517

``

513

518

`def _get_ewmcorr(X, Y):

`

514

``

`-

mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)

`

``

519

`+

mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods,

`

``

520

`+

ignore_na=ignore_na)

`

515

521

`var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods,

`

516

``

`-

bias=True)

`

``

522

`+

bias=True, ignore_na=ignore_na)

`

517

523

`return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y))

`

518

524

`result = _flex_binary_moment(arg1, arg2, _get_ewmcorr,

`

519

525

`pairwise=bool(pairwise))

`