REGR: perf regression in Series.combine_first by lukemanley · Pull Request #57034 · pandas-dev/pandas (original) (raw)
import pandas as pd
import numpy as np
N = 1_000_000
# different indexes
s1 = pd.Series(np.random.randn(N))
s1 = s1[::2].sample(frac=1.0)
s2 = pd.Series(np.random.randn(N))
%timeit s1.combine_first(s2)
# 523 ms ± 7.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) <- main
# 227 ms ± 4.33 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) <- PR
# same index
s1 = pd.Series(np.random.randn(N))
s1[::2] = np.nan
s2 = pd.Series(np.random.randn(N))
%timeit s1.combine_first(s2)
# 282 ms ± 5.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) <- main
# 7 ms ± 64.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) <- PR