REGR: perf regression in Series.combine_first by lukemanley · Pull Request #57034 · pandas-dev/pandas (original) (raw)

import pandas as pd
import numpy as np

N = 1_000_000


# different indexes
s1 = pd.Series(np.random.randn(N))
s1 = s1[::2].sample(frac=1.0)
s2 = pd.Series(np.random.randn(N))
%timeit s1.combine_first(s2)

# 523 ms ± 7.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)   <- main
# 227 ms ± 4.33 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)  <- PR


# same index
s1 = pd.Series(np.random.randn(N))
s1[::2] = np.nan
s2 = pd.Series(np.random.randn(N))
%timeit s1.combine_first(s2)

# 282 ms ± 5.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)   <- main
# 7 ms ± 64.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)  <- PR