Faster rolling_min/rolling_max · Issue #1504 · pandas-dev/pandas (original) (raw)
Navigation Menu
- GitHub Copilot Write better code with AI
- GitHub Models New Manage and compare prompts
- GitHub Advanced Security Find and fix vulnerabilities
- Actions Automate any workflow
- Codespaces Instant dev environments
- Issues Plan and track work
- Code Review Manage code changes
- Discussions Collaborate outside of code
- Code Search Find more, search less
- Explore
- Pricing
Provide feedback
Saved searches
Use saved searches to filter your results more quickly
Appearance settings
Description
See #50
Using a min-heap
and max-heap
to implement the rolling_min
and rolling_max
functions seems to be about 30% faster than the current implementation.
Something sort of like this:
from heapq import heappush, heapify from numpy import empty, nan cimport numpy as np
cdef extern from "math.h": bint isnan(double x)
cdef np.float64_t NaN = nan
def rolling_min(np.ndarray[np.float64_t] arg, int window):
cdef Py_ssize_t N = len(arg)
cdef np.ndarray[np.double_t] output = empty(N, dtype=float)
cdef list min_heap = []
for i from 0 <= i < N:
x = arg[i]
if isnan(x):
min_heap = []
output[i] = x
else:
heappush(min_heap, arg[i])
if len(min_heap) < window:
output[i] = NaN
else:
output[i] = min_heap[0]
x = arg[i+1-window]
min_heap.remove(x)
heapify(min_heap)
return output
def rolling_max(np.ndarray[np.float64_t] arg, int window):
cdef Py_ssize_t N = len(arg)
cdef np.ndarray[np.double_t] output = empty(N, dtype=float)
cdef list min_heap = []
for i from 0 <= i < N:
x = arg[i]
if isnan(x):
min_heap = []
output[i] = x
else:
heappush(min_heap, -arg[i])
if len(min_heap) < window:
output[i] = NaN
else:
output[i] = -min_heap[0]
x = -arg[i+1-window]
min_heap.remove(x)
heapify(min_heap)
return output