PERF: block-wise arithmetic for frame-with-frame by jbrockmendel · Pull Request #32779 · pandas-dev/pandas (original) (raw)

arr = np.random.randn(10 ** 6).reshape(500, 2000).astype(np.float64)
df = pd.DataFrame(arr)
df[1000] = df[1000].astype(np.float32)
df.iloc[:, 1000:] = df.iloc[:, 1000:].astype(np.float32)

df2 = pd.DataFrame(arr)
df2[1000] = df2[1000].astype(np.int64)
df2.iloc[:, 500:1500] = df2.iloc[:, 500:1500].astype(np.int64)

df._consolidate_inplace()
df2._consolidate_inplace()

In [35]: %timeit df + df2                                                                                                                                                                         
572 ms ± 55 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)  # <-- master
113 ms ± 10.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)  # <-- PR

In [38]: %timeit df + df                                                                                                                                                                                
527 ms ± 69.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)  # <-- master
2.51 ms ± 24.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)  # <-- PR