PERF: Index.join to maintain cached attributes in more cases by lukemanley · Pull Request #57023 · pandas-dev/pandas (original) (raw)
import pandas as pd
data = [f"i-{i:05}" for i in range(100_000)]
dtype = "string[pyarrow_numpy]"
idx1 = pd.Index(data, dtype=dtype)
idx2 = pd.Index(data[1:], dtype=dtype)
# the is_unique call at the end is cached in this PR
%timeit idx1.join(idx2, how="outer").is_unique
# 59.1 ms ± 1.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) -> main
# 41.9 ms ± 894 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) -> PR