PERF: Index.join to maintain cached attributes in more cases by lukemanley · Pull Request #57023 · pandas-dev/pandas (original) (raw)

import pandas as pd

data = [f"i-{i:05}" for i in range(100_000)]
dtype = "string[pyarrow_numpy]"

idx1 = pd.Index(data, dtype=dtype)
idx2 = pd.Index(data[1:], dtype=dtype)

# the is_unique call at the end is cached in this PR
%timeit idx1.join(idx2, how="outer").is_unique

# 59.1 ms ± 1.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)  -> main
# 41.9 ms ± 894 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)   -> PR