PERF: avoid copy in _obj_with_exclusions, _selected_obj by jbrockmendel · Pull Request #51090 · pandas-dev/pandas (original) (raw)

import pandas as pd
import numpy as np
import psutil
proc = psutil.Process()

np.random.seed(456123)
df = pd.DataFrame(
    np.random.randn(10_000_000, 5),
    columns=["A", "B", "C", "D", "E"],
)

grps = np.arange(10).repeat(1_000_000)
gb = df.groupby(grps)

gb2 = gb[["C"]]

In [3]: np.shares_memory(gb2._obj_with_exclusions["C"], df["C"])
True   # <- PR
False  # <- main

In [5]: proc.memory_info()
Out[5]: pmem(rss=658894848, vms=36076212224, pfaults=169250, pageins=0)  <- main
Out[5]: pmem(rss=578924544, vms=35993063424, pfaults=149692, pageins=0)  <- PR