PERF: GroupBy.any/all operate blockwise instead of column-wise by jbrockmendel · Pull Request #42841 · pandas-dev/pandas (original) (raw)

The idea is to incrementally move all of the relevant functions to operate block-wise, at which point we can also de-duplicate _get_cythonized_result with _cython_agg_general.

Fleshes out the asv, which currently checks only single-column, to measure ncols=1, 2, 5, 10. We take a performance hit on the ncols==1 case and improve all the others (ex some noise)

asv continuous -f 1.01 master HEAD -E virtualenv -b GroupByMethods --append-samples --record-samples
[...]
       before           after         ratio
     [a5f8c9a0]       [8586317b]
     <perf-from_records>       <perf-gb> 
+        58.2±5μs        71.7±20μs     1.23  groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'transformation', 1)
+        76.9±6μs        94.1±20μs     1.22  groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'direct', 10)
+        66.7±3μs        81.2±20μs     1.22  groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'transformation', 2)
+        65.9±3μs        78.4±20μs     1.19  groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'direct', 2)
+        68.9±2μs        81.8±20μs     1.19  groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'direct', 5)
+        77.0±4μs        91.3±30μs     1.19  groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'transformation', 10)
+        177±10μs         210±40μs     1.18  groupby.GroupByMethods.time_dtype_as_field('int', 'cumcount', 'transformation', 5)
+        66.1±2μs        77.3±20μs     1.17  groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'direct', 2)
+      57.1±0.9μs        64.7±20μs     1.13  groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'direct', 1)
+       67.1±10μs        75.9±10μs     1.13  groupby.GroupByMethods.time_dtype_as_field('float', 'all', 'direct', 5)
+        65.4±3μs        72.6±20μs     1.11  groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'transformation', 2)
+        56.7±1μs        62.8±20μs     1.11  groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'direct', 1)
+        77.2±2μs        85.4±20μs     1.11  groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'transformation', 10)
+       66.0±10μs        73.0±10μs     1.11  groupby.GroupByMethods.time_dtype_as_field('float', 'all', 'direct', 2)
+        78.2±4μs        86.4±20μs     1.10  groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'direct', 10)
+         20.8±0s          22.7±0s     1.09  groupby.GroupByMethods.time_dtype_as_group('float', 'describe', 'direct', 10)
+        69.3±2μs        75.6±20μs     1.09  groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'transformation', 5)
+       76.6±10μs        82.6±20μs     1.08  groupby.GroupByMethods.time_dtype_as_field('float', 'all', 'direct', 10)
+        105±20μs         113±30μs     1.08  groupby.GroupByMethods.time_dtype_as_field('float', 'max', 'direct', 5)
+         13.4±0s          14.3±0s     1.07  groupby.GroupByMethods.time_dtype_as_group('int', 'describe', 'direct', 10)
+        59.4±3μs        63.4±20μs     1.07  groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'transformation', 1)
+        70.2±2μs        74.6±20μs     1.06  groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'transformation', 5)
+         20.9±0s          22.1±0s     1.06  groupby.GroupByMethods.time_dtype_as_group('float', 'describe', 'transformation', 10)
+         13.5±0s          14.1±0s     1.05  groupby.GroupByMethods.time_dtype_as_group('int', 'describe', 'transformation', 10)
+         13.2±0s        13.5±0.2s     1.03  groupby.GroupByMethods.time_dtype_as_group('uint', 'describe', 'direct', 10)
+         13.2±0s       13.4±0.06s     1.02  groupby.GroupByMethods.time_dtype_as_group('uint', 'describe', 'transformation', 10)
-       438±100ms          431±4ms     0.98  groupby.GroupByMethods.time_dtype_as_group('uint', 'skew', 'transformation', 2)
-       621±100μs         588±20μs     0.95  groupby.GroupByMethods.time_dtype_as_group('uint', 'sum', 'transformation', 10)
-      2.39±0.3ms      2.27±0.04ms     0.95  groupby.GroupByMethods.time_dtype_as_group('uint', 'rank', 'direct', 10)
-       635±100μs         598±10μs     0.94  groupby.GroupByMethods.time_dtype_as_group('uint', 'sum', 'direct', 10)
-       487±100μs         457±10μs     0.94  groupby.GroupByMethods.time_dtype_as_group('uint', 'prod', 'direct', 2)
-        196±60μs          182±4μs     0.93  groupby.GroupByMethods.time_dtype_as_group('uint', 'mean', 'direct', 2)
-       72.5±20μs         65.2±2μs     0.90  groupby.GroupByMethods.time_dtype_as_group('uint', 'cummax', 'direct', 1)
-        320±50μs         287±10μs     0.90  groupby.GroupByMethods.time_dtype_as_group('float', 'var', 'direct', 10)
-        115±30μs          102±3μs     0.89  groupby.GroupByMethods.time_dtype_as_group('datetime', 'last', 'transformation', 1)
-       628±100μs          555±9μs     0.88  groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'direct', 5)
-        164±40μs          145±3μs     0.88  groupby.GroupByMethods.time_dtype_as_group('object', 'tail', 'transformation', 1)
-        259±50μs          229±6μs     0.88  groupby.GroupByMethods.time_dtype_as_group('float', 'mean', 'direct', 5)
-        300±60μs         265±10μs     0.88  groupby.GroupByMethods.time_dtype_as_group('float', 'tail', 'direct', 5)
-       90.0±20μs         79.4±2μs     0.88  groupby.GroupByMethods.time_dtype_as_group('object', 'size', 'direct', 10)
-        174±40μs          153±2μs     0.88  groupby.GroupByMethods.time_dtype_as_group('float', 'tail', 'transformation', 1)
-       744±100μs         655±20μs     0.88  groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'transformation', 10)
-        224±50μs          197±3μs     0.88  groupby.GroupByMethods.time_dtype_as_group('object', 'head', 'transformation', 10)
-        212±50μs          186±7μs     0.88  groupby.GroupByMethods.time_dtype_as_group('float', 'var', 'transformation', 2)
-        283±60μs          248±4μs     0.87  groupby.GroupByMethods.time_dtype_as_group('datetime', 'head', 'transformation', 2)
-        258±50μs          225±4μs     0.87  groupby.GroupByMethods.time_dtype_as_group('float', 'mean', 'transformation', 5)
-        150±30μs          131±4μs     0.87  groupby.GroupByMethods.time_dtype_as_group('uint', 'cummax', 'transformation', 10)
-       326±100μs         284±50μs     0.87  groupby.GroupByMethods.time_dtype_as_field('object', 'ffill', 'direct', 10)
-        214±50μs          186±3μs     0.87  groupby.GroupByMethods.time_dtype_as_group('float', 'var', 'direct', 2)
-        379±90μs          328±9μs     0.87  groupby.GroupByMethods.time_dtype_as_group('float', 'std', 'transformation', 2)
-       739±200μs         639±20μs     0.87  groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'direct', 10)
-        214±50μs          185±3μs     0.86  groupby.GroupByMethods.time_dtype_as_group('float', 'mean', 'transformation', 2)
-        123±30μs          106±4μs     0.86  groupby.GroupByMethods.time_dtype_as_group('uint', 'first', 'transformation', 1)
-        106±30μs        91.0±20μs     0.86  groupby.GroupByMethods.time_dtype_as_field('datetime', 'cummin', 'transformation', 2)
-        155±40μs         132±10μs     0.85  groupby.GroupByMethods.time_dtype_as_group('uint', 'cummin', 'transformation', 10)
-        187±40μs         159±30μs     0.85  groupby.GroupByMethods.time_dtype_as_field('object', 'first', 'transformation', 2)
-        262±70μs          223±3μs     0.85  groupby.GroupByMethods.time_dtype_as_group('uint', 'var', 'transformation', 5)
-       482±100μs          408±7μs     0.85  groupby.GroupByMethods.time_dtype_as_group('float', 'min', 'transformation', 10)
-        392±90μs         330±10μs     0.84  groupby.GroupByMethods.time_dtype_as_group('object', 'last', 'transformation', 5)
-        120±30μs          101±3μs     0.84  groupby.GroupByMethods.time_dtype_as_group('datetime', 'cummin', 'direct', 2)
-        120±40μs          100±2μs     0.83  groupby.GroupByMethods.time_dtype_as_group('datetime', 'cummin', 'transformation', 2)
-        255±90μs         211±40μs     0.83  groupby.GroupByMethods.time_dtype_as_field('object', 'tail', 'transformation', 10)
-        119±30μs        98.6±20μs     0.83  groupby.GroupByMethods.time_dtype_as_field('datetime', 'count', 'transformation', 10)
-        124±30μs         101±20μs     0.82  groupby.GroupByMethods.time_dtype_as_field('datetime', 'cummin', 'direct', 10)
-      1.00±0.2ms         819±50μs     0.81  groupby.GroupByMethods.time_dtype_as_group('object', 'shift', 'transformation', 10)
-       76.5±20μs        62.3±10μs     0.81  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'direct', 1)
-        409±90μs          332±5μs     0.81  groupby.GroupByMethods.time_dtype_as_group('object', 'last', 'transformation', 10)
-        399±90μs          324±9μs     0.81  groupby.GroupByMethods.time_dtype_as_group('object', 'last', 'direct', 2)
-       96.7±20μs        77.9±20μs     0.81  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'direct', 10)
-       83.5±20μs        67.2±10μs     0.81  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'direct', 5)
-        232±70μs         187±30μs     0.81  groupby.GroupByMethods.time_dtype_as_field('object', 'first', 'transformation', 10)
-        156±50μs          125±2μs     0.80  groupby.GroupByMethods.time_dtype_as_group('datetime', 'cummin', 'transformation', 10)
-       78.5±20μs        62.6±10μs     0.80  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'direct', 2)
-       85.0±20μs        67.7±10μs     0.80  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'transformation', 2)
-       83.9±20μs        66.8±10μs     0.80  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'transformation', 5)
-        112±30μs         88.8±2μs     0.79  groupby.GroupByMethods.time_dtype_as_group('object', 'last', 'direct', 1)
-        225±60μs         178±40μs     0.79  groupby.GroupByMethods.time_dtype_as_field('object', 'last', 'direct', 10)
-        254±50μs         201±40μs     0.79  groupby.GroupByMethods.time_dtype_as_field('object', 'head', 'direct', 10)
-       90.8±20μs        71.6±20μs     0.79  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'transformation', 5)
-        243±70μs          190±7μs     0.78  groupby.GroupByMethods.time_dtype_as_group('float', 'prod', 'transformation', 1)
-       92.6±20μs        70.9±10μs     0.76  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'direct', 5)
-        236±60μs         180±40μs     0.76  groupby.GroupByMethods.time_dtype_as_field('object', 'cumcount', 'direct', 10)
-        4.31±1ms       3.29±0.6ms     0.76  groupby.GroupByMethods.time_dtype_as_group('int', 'quantile', 'transformation', 10)
-       382±100μs         289±50μs     0.76  groupby.GroupByMethods.time_dtype_as_field('object', 'bfill', 'direct', 10)
-       515±200μs         388±10μs     0.75  groupby.GroupByMethods.time_dtype_as_group('uint', 'cumprod', 'transformation', 10)
-      1.31±0.4ms        984±200μs     0.75  groupby.GroupByMethods.time_dtype_as_field('object', 'all', 'transformation', 10)
-        135±40μs         102±20μs     0.75  groupby.GroupByMethods.time_dtype_as_field('float', 'sum', 'transformation', 5)
-       638±200μs         480±20μs     0.75  groupby.GroupByMethods.time_dtype_as_group('float', 'prod', 'direct', 2)
-        251±70μs         189±30μs     0.75  groupby.GroupByMethods.time_dtype_as_field('object', 'count', 'direct', 10)
-       730±200μs         547±40μs     0.75  groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'transformation', 5)
-       819±300μs         612±20μs     0.75  groupby.GroupByMethods.time_dtype_as_group('float', 'prod', 'direct', 10)
-        134±40μs        98.9±20μs     0.74  groupby.GroupByMethods.time_dtype_as_field('float', 'var', 'direct', 10)
-       647±200μs         478±10μs     0.74  groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'transformation', 2)
-      1.35±0.4ms        993±200μs     0.74  groupby.GroupByMethods.time_dtype_as_field('object', 'all', 'direct', 10)
-        132±40μs        96.9±20μs     0.74  groupby.GroupByMethods.time_dtype_as_field('float', 'var', 'transformation', 10)
-        187±60μs          135±6μs     0.73  groupby.GroupByMethods.time_dtype_as_group('uint', 'cumsum', 'direct', 2)
-       750±200μs         540±20μs     0.72  groupby.GroupByMethods.time_dtype_as_group('float', 'rank', 'transformation', 2)
-       504±100μs         362±90μs     0.72  groupby.GroupByMethods.time_dtype_as_field('object', 'nunique', 'transformation', 10)
-       95.3±30μs        68.4±10μs     0.72  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'direct', 2)
-       90.4±30μs        64.7±10μs     0.72  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'direct', 1)
-        103±30μs        73.6±10μs     0.72  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'direct', 10)
-        115±40μs        76.3±10μs     0.66  groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'transformation', 10)
-        103±20μs         66.9±3μs     0.65  groupby.GroupByMethods.time_dtype_as_group('uint', 'cumsum', 'transformation', 1)
-        339±70μs         120±30μs     0.35  groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'direct', 2)
-        336±90μs         118±10μs     0.35  groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'direct', 2)
-        339±70μs         116±30μs     0.34  groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'transformation', 2)
-        314±60μs         107±20μs     0.34  groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'transformation', 2)
-        335±80μs          108±6μs     0.32  groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'direct', 2)
-       379±100μs         122±40μs     0.32  groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'transformation', 2)
-        341±80μs         109±10μs     0.32  groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'transformation', 2)
-        345±90μs         109±10μs     0.32  groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'transformation', 2)
-        335±80μs         105±10μs     0.31  groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'transformation', 2)
-        328±80μs          102±5μs     0.31  groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'direct', 2)
-        338±90μs          105±5μs     0.31  groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'direct', 2)
-        336±80μs         104±20μs     0.31  groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'direct', 2)
-        330±80μs          102±4μs     0.31  groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'direct', 2)
-        338±90μs          104±4μs     0.31  groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'transformation', 2)
-        345±80μs          105±7μs     0.31  groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'transformation', 2)
-        333±80μs          101±7μs     0.30  groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'transformation', 2)
-       391±100μs          107±5μs     0.27  groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'direct', 2)
-       385±100μs         106±20μs     0.27  groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'direct', 2)
-       436±100μs         119±20μs     0.27  groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'transformation', 5)
-       377±100μs         102±20μs     0.27  groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'transformation', 2)
-       465±100μs         122±30μs     0.26  groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'direct', 5)
-       389±100μs         102±20μs     0.26  groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'direct', 2)
-       437±100μs          113±7μs     0.26  groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'direct', 5)
-        434±90μs         111±10μs     0.26  groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'transformation', 5)
-       456±100μs          116±7μs     0.26  groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'direct', 5)
-       469±100μs         118±10μs     0.25  groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'transformation', 5)
-       457±100μs         114±20μs     0.25  groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'direct', 5)
-       459±100μs          114±6μs     0.25  groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'transformation', 5)
-       448±100μs         110±20μs     0.25  groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'transformation', 5)
-       484±100μs         117±10μs     0.24  groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'direct', 5)
-       464±100μs          111±4μs     0.24  groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'direct', 5)
-       456±100μs          109±7μs     0.24  groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'transformation', 5)
-       459±100μs          108±6μs     0.24  groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'transformation', 5)
-       478±200μs          113±6μs     0.24  groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'direct', 5)
-       461±100μs          108±8μs     0.24  groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'transformation', 5)
-       452±100μs          105±3μs     0.23  groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'direct', 5)
-       528±200μs         121±30μs     0.23  groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'transformation', 5)
-       582±100μs         127±30μs     0.22  groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'transformation', 10)
-       534±100μs         117±30μs     0.22  groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'direct', 5)
-       530±100μs         116±30μs     0.22  groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'direct', 5)
-       628±100μs         133±30μs     0.21  groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'direct', 10)
-       539±200μs         113±30μs     0.21  groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'transformation', 5)
-       645±200μs         130±10μs     0.20  groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'transformation', 10)
-       657±200μs         127±10μs     0.19  groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'direct', 10)
-       621±100μs         120±30μs     0.19  groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'transformation', 10)
-       654±200μs         126±10μs     0.19  groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'transformation', 10)
-       651±200μs         123±10μs     0.19  groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'transformation', 10)
-       649±200μs          121±8μs     0.19  groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'direct', 10)
-       670±200μs         124±30μs     0.19  groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'direct', 10)
-       663±200μs          122±6μs     0.18  groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'transformation', 10)
-       659±200μs          119±6μs     0.18  groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'transformation', 10)
-       665±200μs          120±9μs     0.18  groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'direct', 10)
-       693±200μs          123±3μs     0.18  groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'direct', 10)
-       663±100μs          117±7μs     0.18  groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'transformation', 10)
-       642±100μs          113±5μs     0.18  groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'direct', 10)
-       755±200μs         125±30μs     0.17  groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'transformation', 10)
-       772±200μs         122±30μs     0.16  groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'direct', 10)
-       762±200μs         120±30μs     0.16  groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'direct', 10)
-       786±200μs          123±6μs     0.16  groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'direct', 10)
-       814±200μs         123±30μs     0.15  groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'transformation', 10)