Faster rolling_min/rolling_max · Issue #1504 · pandas-dev/pandas (original) (raw)

Skip to content

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Sign up

Appearance settings

@mrjbq7

Description

@mrjbq7

See #50

Using a min-heap and max-heap to implement the rolling_min and rolling_max functions seems to be about 30% faster than the current implementation.

Something sort of like this:

from heapq import heappush, heapify from numpy import empty, nan cimport numpy as np

cdef extern from "math.h": bint isnan(double x)

cdef np.float64_t NaN = nan

def rolling_min(np.ndarray[np.float64_t] arg, int window):

cdef Py_ssize_t N = len(arg)
cdef np.ndarray[np.double_t] output = empty(N, dtype=float)
cdef list min_heap = []

for i from 0 <= i < N:
    x = arg[i]
    if isnan(x):
        min_heap = []
        output[i] = x
    else:
        heappush(min_heap, arg[i])
        if len(min_heap) < window:
            output[i] = NaN
        else:
            output[i] = min_heap[0]
            x = arg[i+1-window]
            min_heap.remove(x)
            heapify(min_heap)
return output

def rolling_max(np.ndarray[np.float64_t] arg, int window):

cdef Py_ssize_t N = len(arg)
cdef np.ndarray[np.double_t] output = empty(N, dtype=float)
cdef list min_heap = []

for i from 0 <= i < N:
    x = arg[i]
    if isnan(x):
        min_heap = []
        output[i] = x
    else:
        heappush(min_heap, -arg[i])
        if len(min_heap) < window:
            output[i] = NaN
        else:
            output[i] = -min_heap[0]
            x = -arg[i+1-window]
            min_heap.remove(x)
            heapify(min_heap)
return output