PERF: GroupBy.any/all operate blockwise instead of column-wise by jbrockmendel · Pull Request #42841 · pandas-dev/pandas (original) (raw)
The idea is to incrementally move all of the relevant functions to operate block-wise, at which point we can also de-duplicate _get_cythonized_result with _cython_agg_general.
Fleshes out the asv, which currently checks only single-column, to measure ncols=1, 2, 5, 10. We take a performance hit on the ncols==1 case and improve all the others (ex some noise)
asv continuous -f 1.01 master HEAD -E virtualenv -b GroupByMethods --append-samples --record-samples
[...]
before after ratio
[a5f8c9a0] [8586317b]
<perf-from_records> <perf-gb>
+ 58.2±5μs 71.7±20μs 1.23 groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'transformation', 1)
+ 76.9±6μs 94.1±20μs 1.22 groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'direct', 10)
+ 66.7±3μs 81.2±20μs 1.22 groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'transformation', 2)
+ 65.9±3μs 78.4±20μs 1.19 groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'direct', 2)
+ 68.9±2μs 81.8±20μs 1.19 groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'direct', 5)
+ 77.0±4μs 91.3±30μs 1.19 groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'transformation', 10)
+ 177±10μs 210±40μs 1.18 groupby.GroupByMethods.time_dtype_as_field('int', 'cumcount', 'transformation', 5)
+ 66.1±2μs 77.3±20μs 1.17 groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'direct', 2)
+ 57.1±0.9μs 64.7±20μs 1.13 groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'direct', 1)
+ 67.1±10μs 75.9±10μs 1.13 groupby.GroupByMethods.time_dtype_as_field('float', 'all', 'direct', 5)
+ 65.4±3μs 72.6±20μs 1.11 groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'transformation', 2)
+ 56.7±1μs 62.8±20μs 1.11 groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'direct', 1)
+ 77.2±2μs 85.4±20μs 1.11 groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'transformation', 10)
+ 66.0±10μs 73.0±10μs 1.11 groupby.GroupByMethods.time_dtype_as_field('float', 'all', 'direct', 2)
+ 78.2±4μs 86.4±20μs 1.10 groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'direct', 10)
+ 20.8±0s 22.7±0s 1.09 groupby.GroupByMethods.time_dtype_as_group('float', 'describe', 'direct', 10)
+ 69.3±2μs 75.6±20μs 1.09 groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'transformation', 5)
+ 76.6±10μs 82.6±20μs 1.08 groupby.GroupByMethods.time_dtype_as_field('float', 'all', 'direct', 10)
+ 105±20μs 113±30μs 1.08 groupby.GroupByMethods.time_dtype_as_field('float', 'max', 'direct', 5)
+ 13.4±0s 14.3±0s 1.07 groupby.GroupByMethods.time_dtype_as_group('int', 'describe', 'direct', 10)
+ 59.4±3μs 63.4±20μs 1.07 groupby.GroupByMethods.time_dtype_as_field('int', 'any', 'transformation', 1)
+ 70.2±2μs 74.6±20μs 1.06 groupby.GroupByMethods.time_dtype_as_field('int', 'all', 'transformation', 5)
+ 20.9±0s 22.1±0s 1.06 groupby.GroupByMethods.time_dtype_as_group('float', 'describe', 'transformation', 10)
+ 13.5±0s 14.1±0s 1.05 groupby.GroupByMethods.time_dtype_as_group('int', 'describe', 'transformation', 10)
+ 13.2±0s 13.5±0.2s 1.03 groupby.GroupByMethods.time_dtype_as_group('uint', 'describe', 'direct', 10)
+ 13.2±0s 13.4±0.06s 1.02 groupby.GroupByMethods.time_dtype_as_group('uint', 'describe', 'transformation', 10)
- 438±100ms 431±4ms 0.98 groupby.GroupByMethods.time_dtype_as_group('uint', 'skew', 'transformation', 2)
- 621±100μs 588±20μs 0.95 groupby.GroupByMethods.time_dtype_as_group('uint', 'sum', 'transformation', 10)
- 2.39±0.3ms 2.27±0.04ms 0.95 groupby.GroupByMethods.time_dtype_as_group('uint', 'rank', 'direct', 10)
- 635±100μs 598±10μs 0.94 groupby.GroupByMethods.time_dtype_as_group('uint', 'sum', 'direct', 10)
- 487±100μs 457±10μs 0.94 groupby.GroupByMethods.time_dtype_as_group('uint', 'prod', 'direct', 2)
- 196±60μs 182±4μs 0.93 groupby.GroupByMethods.time_dtype_as_group('uint', 'mean', 'direct', 2)
- 72.5±20μs 65.2±2μs 0.90 groupby.GroupByMethods.time_dtype_as_group('uint', 'cummax', 'direct', 1)
- 320±50μs 287±10μs 0.90 groupby.GroupByMethods.time_dtype_as_group('float', 'var', 'direct', 10)
- 115±30μs 102±3μs 0.89 groupby.GroupByMethods.time_dtype_as_group('datetime', 'last', 'transformation', 1)
- 628±100μs 555±9μs 0.88 groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'direct', 5)
- 164±40μs 145±3μs 0.88 groupby.GroupByMethods.time_dtype_as_group('object', 'tail', 'transformation', 1)
- 259±50μs 229±6μs 0.88 groupby.GroupByMethods.time_dtype_as_group('float', 'mean', 'direct', 5)
- 300±60μs 265±10μs 0.88 groupby.GroupByMethods.time_dtype_as_group('float', 'tail', 'direct', 5)
- 90.0±20μs 79.4±2μs 0.88 groupby.GroupByMethods.time_dtype_as_group('object', 'size', 'direct', 10)
- 174±40μs 153±2μs 0.88 groupby.GroupByMethods.time_dtype_as_group('float', 'tail', 'transformation', 1)
- 744±100μs 655±20μs 0.88 groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'transformation', 10)
- 224±50μs 197±3μs 0.88 groupby.GroupByMethods.time_dtype_as_group('object', 'head', 'transformation', 10)
- 212±50μs 186±7μs 0.88 groupby.GroupByMethods.time_dtype_as_group('float', 'var', 'transformation', 2)
- 283±60μs 248±4μs 0.87 groupby.GroupByMethods.time_dtype_as_group('datetime', 'head', 'transformation', 2)
- 258±50μs 225±4μs 0.87 groupby.GroupByMethods.time_dtype_as_group('float', 'mean', 'transformation', 5)
- 150±30μs 131±4μs 0.87 groupby.GroupByMethods.time_dtype_as_group('uint', 'cummax', 'transformation', 10)
- 326±100μs 284±50μs 0.87 groupby.GroupByMethods.time_dtype_as_field('object', 'ffill', 'direct', 10)
- 214±50μs 186±3μs 0.87 groupby.GroupByMethods.time_dtype_as_group('float', 'var', 'direct', 2)
- 379±90μs 328±9μs 0.87 groupby.GroupByMethods.time_dtype_as_group('float', 'std', 'transformation', 2)
- 739±200μs 639±20μs 0.87 groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'direct', 10)
- 214±50μs 185±3μs 0.86 groupby.GroupByMethods.time_dtype_as_group('float', 'mean', 'transformation', 2)
- 123±30μs 106±4μs 0.86 groupby.GroupByMethods.time_dtype_as_group('uint', 'first', 'transformation', 1)
- 106±30μs 91.0±20μs 0.86 groupby.GroupByMethods.time_dtype_as_field('datetime', 'cummin', 'transformation', 2)
- 155±40μs 132±10μs 0.85 groupby.GroupByMethods.time_dtype_as_group('uint', 'cummin', 'transformation', 10)
- 187±40μs 159±30μs 0.85 groupby.GroupByMethods.time_dtype_as_field('object', 'first', 'transformation', 2)
- 262±70μs 223±3μs 0.85 groupby.GroupByMethods.time_dtype_as_group('uint', 'var', 'transformation', 5)
- 482±100μs 408±7μs 0.85 groupby.GroupByMethods.time_dtype_as_group('float', 'min', 'transformation', 10)
- 392±90μs 330±10μs 0.84 groupby.GroupByMethods.time_dtype_as_group('object', 'last', 'transformation', 5)
- 120±30μs 101±3μs 0.84 groupby.GroupByMethods.time_dtype_as_group('datetime', 'cummin', 'direct', 2)
- 120±40μs 100±2μs 0.83 groupby.GroupByMethods.time_dtype_as_group('datetime', 'cummin', 'transformation', 2)
- 255±90μs 211±40μs 0.83 groupby.GroupByMethods.time_dtype_as_field('object', 'tail', 'transformation', 10)
- 119±30μs 98.6±20μs 0.83 groupby.GroupByMethods.time_dtype_as_field('datetime', 'count', 'transformation', 10)
- 124±30μs 101±20μs 0.82 groupby.GroupByMethods.time_dtype_as_field('datetime', 'cummin', 'direct', 10)
- 1.00±0.2ms 819±50μs 0.81 groupby.GroupByMethods.time_dtype_as_group('object', 'shift', 'transformation', 10)
- 76.5±20μs 62.3±10μs 0.81 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'direct', 1)
- 409±90μs 332±5μs 0.81 groupby.GroupByMethods.time_dtype_as_group('object', 'last', 'transformation', 10)
- 399±90μs 324±9μs 0.81 groupby.GroupByMethods.time_dtype_as_group('object', 'last', 'direct', 2)
- 96.7±20μs 77.9±20μs 0.81 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'direct', 10)
- 83.5±20μs 67.2±10μs 0.81 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'direct', 5)
- 232±70μs 187±30μs 0.81 groupby.GroupByMethods.time_dtype_as_field('object', 'first', 'transformation', 10)
- 156±50μs 125±2μs 0.80 groupby.GroupByMethods.time_dtype_as_group('datetime', 'cummin', 'transformation', 10)
- 78.5±20μs 62.6±10μs 0.80 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'direct', 2)
- 85.0±20μs 67.7±10μs 0.80 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'transformation', 2)
- 83.9±20μs 66.8±10μs 0.80 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'transformation', 5)
- 112±30μs 88.8±2μs 0.79 groupby.GroupByMethods.time_dtype_as_group('object', 'last', 'direct', 1)
- 225±60μs 178±40μs 0.79 groupby.GroupByMethods.time_dtype_as_field('object', 'last', 'direct', 10)
- 254±50μs 201±40μs 0.79 groupby.GroupByMethods.time_dtype_as_field('object', 'head', 'direct', 10)
- 90.8±20μs 71.6±20μs 0.79 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'transformation', 5)
- 243±70μs 190±7μs 0.78 groupby.GroupByMethods.time_dtype_as_group('float', 'prod', 'transformation', 1)
- 92.6±20μs 70.9±10μs 0.76 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'direct', 5)
- 236±60μs 180±40μs 0.76 groupby.GroupByMethods.time_dtype_as_field('object', 'cumcount', 'direct', 10)
- 4.31±1ms 3.29±0.6ms 0.76 groupby.GroupByMethods.time_dtype_as_group('int', 'quantile', 'transformation', 10)
- 382±100μs 289±50μs 0.76 groupby.GroupByMethods.time_dtype_as_field('object', 'bfill', 'direct', 10)
- 515±200μs 388±10μs 0.75 groupby.GroupByMethods.time_dtype_as_group('uint', 'cumprod', 'transformation', 10)
- 1.31±0.4ms 984±200μs 0.75 groupby.GroupByMethods.time_dtype_as_field('object', 'all', 'transformation', 10)
- 135±40μs 102±20μs 0.75 groupby.GroupByMethods.time_dtype_as_field('float', 'sum', 'transformation', 5)
- 638±200μs 480±20μs 0.75 groupby.GroupByMethods.time_dtype_as_group('float', 'prod', 'direct', 2)
- 251±70μs 189±30μs 0.75 groupby.GroupByMethods.time_dtype_as_field('object', 'count', 'direct', 10)
- 730±200μs 547±40μs 0.75 groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'transformation', 5)
- 819±300μs 612±20μs 0.75 groupby.GroupByMethods.time_dtype_as_group('float', 'prod', 'direct', 10)
- 134±40μs 98.9±20μs 0.74 groupby.GroupByMethods.time_dtype_as_field('float', 'var', 'direct', 10)
- 647±200μs 478±10μs 0.74 groupby.GroupByMethods.time_dtype_as_group('float', 'sum', 'transformation', 2)
- 1.35±0.4ms 993±200μs 0.74 groupby.GroupByMethods.time_dtype_as_field('object', 'all', 'direct', 10)
- 132±40μs 96.9±20μs 0.74 groupby.GroupByMethods.time_dtype_as_field('float', 'var', 'transformation', 10)
- 187±60μs 135±6μs 0.73 groupby.GroupByMethods.time_dtype_as_group('uint', 'cumsum', 'direct', 2)
- 750±200μs 540±20μs 0.72 groupby.GroupByMethods.time_dtype_as_group('float', 'rank', 'transformation', 2)
- 504±100μs 362±90μs 0.72 groupby.GroupByMethods.time_dtype_as_field('object', 'nunique', 'transformation', 10)
- 95.3±30μs 68.4±10μs 0.72 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'direct', 2)
- 90.4±30μs 64.7±10μs 0.72 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'direct', 1)
- 103±30μs 73.6±10μs 0.72 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummax', 'direct', 10)
- 115±40μs 76.3±10μs 0.66 groupby.GroupByMethods.time_dtype_as_field('uint', 'cummin', 'transformation', 10)
- 103±20μs 66.9±3μs 0.65 groupby.GroupByMethods.time_dtype_as_group('uint', 'cumsum', 'transformation', 1)
- 339±70μs 120±30μs 0.35 groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'direct', 2)
- 336±90μs 118±10μs 0.35 groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'direct', 2)
- 339±70μs 116±30μs 0.34 groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'transformation', 2)
- 314±60μs 107±20μs 0.34 groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'transformation', 2)
- 335±80μs 108±6μs 0.32 groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'direct', 2)
- 379±100μs 122±40μs 0.32 groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'transformation', 2)
- 341±80μs 109±10μs 0.32 groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'transformation', 2)
- 345±90μs 109±10μs 0.32 groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'transformation', 2)
- 335±80μs 105±10μs 0.31 groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'transformation', 2)
- 328±80μs 102±5μs 0.31 groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'direct', 2)
- 338±90μs 105±5μs 0.31 groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'direct', 2)
- 336±80μs 104±20μs 0.31 groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'direct', 2)
- 330±80μs 102±4μs 0.31 groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'direct', 2)
- 338±90μs 104±4μs 0.31 groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'transformation', 2)
- 345±80μs 105±7μs 0.31 groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'transformation', 2)
- 333±80μs 101±7μs 0.30 groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'transformation', 2)
- 391±100μs 107±5μs 0.27 groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'direct', 2)
- 385±100μs 106±20μs 0.27 groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'direct', 2)
- 436±100μs 119±20μs 0.27 groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'transformation', 5)
- 377±100μs 102±20μs 0.27 groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'transformation', 2)
- 465±100μs 122±30μs 0.26 groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'direct', 5)
- 389±100μs 102±20μs 0.26 groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'direct', 2)
- 437±100μs 113±7μs 0.26 groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'direct', 5)
- 434±90μs 111±10μs 0.26 groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'transformation', 5)
- 456±100μs 116±7μs 0.26 groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'direct', 5)
- 469±100μs 118±10μs 0.25 groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'transformation', 5)
- 457±100μs 114±20μs 0.25 groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'direct', 5)
- 459±100μs 114±6μs 0.25 groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'transformation', 5)
- 448±100μs 110±20μs 0.25 groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'transformation', 5)
- 484±100μs 117±10μs 0.24 groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'direct', 5)
- 464±100μs 111±4μs 0.24 groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'direct', 5)
- 456±100μs 109±7μs 0.24 groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'transformation', 5)
- 459±100μs 108±6μs 0.24 groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'transformation', 5)
- 478±200μs 113±6μs 0.24 groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'direct', 5)
- 461±100μs 108±8μs 0.24 groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'transformation', 5)
- 452±100μs 105±3μs 0.23 groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'direct', 5)
- 528±200μs 121±30μs 0.23 groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'transformation', 5)
- 582±100μs 127±30μs 0.22 groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'transformation', 10)
- 534±100μs 117±30μs 0.22 groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'direct', 5)
- 530±100μs 116±30μs 0.22 groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'direct', 5)
- 628±100μs 133±30μs 0.21 groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'direct', 10)
- 539±200μs 113±30μs 0.21 groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'transformation', 5)
- 645±200μs 130±10μs 0.20 groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'transformation', 10)
- 657±200μs 127±10μs 0.19 groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'direct', 10)
- 621±100μs 120±30μs 0.19 groupby.GroupByMethods.time_dtype_as_group('int', 'all', 'transformation', 10)
- 654±200μs 126±10μs 0.19 groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'transformation', 10)
- 651±200μs 123±10μs 0.19 groupby.GroupByMethods.time_dtype_as_group('uint', 'all', 'transformation', 10)
- 649±200μs 121±8μs 0.19 groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'direct', 10)
- 670±200μs 124±30μs 0.19 groupby.GroupByMethods.time_dtype_as_group('int', 'any', 'direct', 10)
- 663±200μs 122±6μs 0.18 groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'transformation', 10)
- 659±200μs 119±6μs 0.18 groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'transformation', 10)
- 665±200μs 120±9μs 0.18 groupby.GroupByMethods.time_dtype_as_group('datetime', 'all', 'direct', 10)
- 693±200μs 123±3μs 0.18 groupby.GroupByMethods.time_dtype_as_group('uint', 'any', 'direct', 10)
- 663±100μs 117±7μs 0.18 groupby.GroupByMethods.time_dtype_as_group('object', 'any', 'transformation', 10)
- 642±100μs 113±5μs 0.18 groupby.GroupByMethods.time_dtype_as_group('object', 'all', 'direct', 10)
- 755±200μs 125±30μs 0.17 groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'transformation', 10)
- 772±200μs 122±30μs 0.16 groupby.GroupByMethods.time_dtype_as_group('float', 'any', 'direct', 10)
- 762±200μs 120±30μs 0.16 groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'direct', 10)
- 786±200μs 123±6μs 0.16 groupby.GroupByMethods.time_dtype_as_group('datetime', 'any', 'direct', 10)
- 814±200μs 123±30μs 0.15 groupby.GroupByMethods.time_dtype_as_group('float', 'all', 'transformation', 10)