BUG: 2D ndarray of dtype 'object' is always copied upon construction by irgolic · Pull Request #39272 · pandas-dev/pandas (original) (raw)

Here's the full asv run comparing the last commit (your suggestion) and the second to last commit. Any of these in particular you'd like me to rerun?

       before           after         ratio                                                                                                                  
     [dc2ae20d]       [96dd1b9e]                                                                                                                     
     <2d-object-dont-copy>       <2d-object-dont-copy~1>                                                                                                     
+     4.43±0.01ms       94.2±0.3ms    21.27  indexing.NumericSeriesIndexing.time_getitem_list_like(<class 'pandas.core.indexes.numeric.Float64Index'>, 'uniqu
e_monotonic_inc')                                                                                                                                            
+        59.0±3ms       75.8±0.4ms     1.28  inference.ToTimedeltaErrors.time_convert('coerce')                                                              
+        45.8±5ms         58.5±5ms     1.28  gil.ParallelGroupbyMethods.time_loop(2, 'last')                                                                 
+      4.37±0.2ms       5.34±0.3ms     1.22  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'max')                           
+      5.31±0.1ms       6.45±0.5ms     1.21  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'sum')            
+      4.46±0.1ms       5.40±0.3ms     1.21  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'mean')                       
+      5.38±0.2ms       6.51±0.6ms     1.21  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function Engine.<lambda>>, 'cython', 'max')              
+         147±9ms          178±1ms     1.21  inference.ToDatetimeISO8601.time_iso8601_tz_spaceformat                                                         
+        72.1±5ms         86.7±1ms     1.20  inference.ToDatetimeCache.time_dup_string_tzoffset_dates(False)                                                 
+     4.57±0.09ms       5.47±0.3ms     1.20  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'max')                        
+      5.23±0.2ms       6.23±0.4ms     1.19  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'mean')              
+      5.40±0.1ms       6.42±0.5ms     1.19  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'median')         
+      4.29±0.1ms       5.10±0.3ms     1.19  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'mean')                            
+      4.52±0.2ms       5.37±0.2ms     1.19  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'median')                       
+      4.45±0.2ms       5.28±0.4ms     1.19  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'mean')                          
+      15.4±0.6μs       18.2±0.3μs     1.19  series_methods.NanOps.time_func('sum', 1000, 'boolean')                                                         
+      4.39±0.1ms       5.20±0.2ms     1.19  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'max')                             
+      5.25±0.2ms       6.23±0.4ms     1.18  rolling.Engine.time_rolling_apply('Series', 'int', <function Engine.<lambda>>, 'cython', 'median')              
+      5.20±0.2ms       6.16±0.3ms     1.18  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'sum')               
+      4.37±0.2ms       5.17±0.3ms     1.18  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'median')                          
+     4.53±0.04ms       5.35±0.3ms     1.18  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'min')                          
+      4.35±0.1ms       5.13±0.3ms     1.18  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'sum')                             
+      4.40±0.1ms       5.18±0.3ms     1.18  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'median')                        
+      5.25±0.2ms       6.17±0.2ms     1.17  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'min')               
+      5.39±0.2ms       6.32±0.4ms     1.17  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function Engine.<lambda>>, 'cython', 'min')              
+        58.8±2ms         68.9±7ms     1.17  inference.ToTimedeltaErrors.time_convert('ignore')                                                              
+      4.42±0.2ms       5.17±0.2ms     1.17  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'min')                           
+      5.33±0.1ms       6.23±0.2ms     1.17  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'min')            
+      5.38±0.3ms       6.27±0.4ms     1.17  rolling.Engine.time_rolling_apply('Series', 'int', <function Engine.<lambda>>, 'cython', 'max')                 
+      4.73±0.3ms       5.51±0.4ms     1.17  rolling.Engine.time_expanding_apply('Series', 'int', <function sum>, 'cython', 'max')                           
+      5.20±0.1ms       6.06±0.3ms     1.16  rolling.Engine.time_rolling_apply('Series', 'int', <function Engine.<lambda>>, 'cython', 'min')                 
+      4.60±0.2ms       5.36±0.3ms     1.16  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'max')                          
+      13.2±0.4μs       15.3±0.3μs     1.16  series_methods.NanOps.time_func('max', 1000, 'boolean')                                                         
+     4.40±0.06ms       5.11±0.4ms     1.16  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'sum')                           
+      4.57±0.1ms       5.29±0.3ms     1.16  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'sum')                          
+     4.67±0.09ms       5.41±0.2ms     1.16  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'mean')                         
+      5.39±0.1ms       6.23±0.5ms     1.16  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'mean')           
+      5.21±0.1ms       6.02±0.4ms     1.16  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'max')               
+     4.59±0.03ms       5.29±0.2ms     1.15  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'median')                     
+      5.31±0.1ms       6.12±0.3ms     1.15  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function Engine.<lambda>>, 'cython', 'mean')             
+     4.55±0.03ms       5.24±0.3ms     1.15  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'sum')                        
+      5.37±0.1ms       6.19±0.4ms     1.15  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function Engine.<lambda>>, 'cython', 'sum')              
+      16.4±0.7μs         18.7±1μs     1.14  series_methods.SearchSorted.time_searchsorted('uint8')                                                          
+      5.26±0.1ms       5.99±0.3ms     1.14  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'median')            
+      4.64±0.1ms       5.29±0.3ms     1.14  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'min')                        
+     1.17±0.01ms      1.33±0.07ms     1.14  hash_functions.NumericSeriesIndexingShuffled.time_loc_slice(<class 'pandas.core.indexes.numeric.UInt64Index'>, 5
00000)                                                                                                                                   
+     2.58±0.03μs      2.93±0.08μs     1.13  indexing_engines.NumericEngineIndexing.time_get_loc((<class 'pandas._libs.index.Float64Engine'>, <class 'numpy.f
loat64'>), 'monotonic_incr')                                                                                                                                 
+      5.21±0.2ms       5.91±0.3ms     1.13  rolling.Engine.time_rolling_apply('Series', 'int', <function Engine.<lambda>>, 'cython', 'mean')                
+        4.28±0ms       4.84±0.5ms     1.13  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 10000, datetime.timezone(datetime.timedelta(se
conds=3600)))                                                                                                                                                
+      4.51±0.2ms       5.10±0.2ms     1.13  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'min')                             
+      5.50±0.2ms       6.21±0.2ms     1.13  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'max')            
+      16.5±0.4μs         18.6±1μs     1.13  series_methods.SearchSorted.time_searchsorted('int16')                                                          
+     2.57±0.01μs       2.89±0.2μs     1.13  indexing_engines.NumericEngineIndexing.time_get_loc((<class 'pandas._libs.index.Int64Engine'>, <class 'numpy.int
64'>), 'monotonic_incr')                                                                                                                                     
+       452±0.8ms         508±50ms     1.12  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 1000000, datetime.timezone(datetime.timedelta(
seconds=3600)))                                                                                                                                              
+        806±20μs         904±40μs     1.12  arithmetic.IntFrameWithScalar.time_frame_op_with_scalar(<class 'numpy.float64'>, 5.0, <built-in function ne>)   
+     3.20±0.04μs       3.59±0.2μs     1.12  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(0, 'start', 'month', 'QS', 3)                       
+      13.2±0.5μs       14.8±0.6μs     1.12  series_methods.NanOps.time_func('min', 1000, 'boolean')                                                         
+         121±5μs          136±4μs     1.12  multiindex_object.GetLoc.time_large_get_loc                                                                     
+      47.6±0.2ms         53.0±1ms     1.11  rolling.TableMethod.time_apply('single')                                                                        
+     4.17±0.02μs       4.64±0.2μs     1.11  categoricals.CategoricalSlicing.time_getitem_scalar('non_monotonic')                                            
+      15.9±0.3ms       17.7±0.2ms     1.11  inference.ToTimedelta.time_convert_string_days                                                                  
+        741±20μs         824±20μs     1.11  arithmetic.IntFrameWithScalar.time_frame_op_with_scalar(<class 'numpy.int64'>, 2, <built-in function eq>)       
+        732±20μs         813±20μs     1.11  arithmetic.IntFrameWithScalar.time_frame_op_with_scalar(<class 'numpy.float64'>, 5.0, <built-in function le>)   
+     3.32±0.03μs       3.67±0.2μs     1.11  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(0, 'start', 'year', 'B', 5)                         
+     3.44±0.02μs       3.81±0.3μs     1.11  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(1, 'start', 'year', 'B', 5)                         
+       113±0.7μs          125±1μs     1.11  indexing.NumericSeriesIndexing.time_loc_slice(<class 'pandas.core.indexes.numeric.UInt64Index'>, 'unique_monoton
ic_inc')                                                                                                                                                     
+     4.16±0.02μs       4.60±0.4μs     1.10  categoricals.CategoricalSlicing.time_getitem_scalar('monotonic_incr')                                           
+      13.0±0.2μs       14.3±0.2μs     1.10  series_methods.NanOps.time_func('sum', 1000, 'Int64')                                                           
+        60.3±1ms         66.6±1ms     1.10  rolling.Groupby.time_rolling_int('mean')                                                                        
+      14.4±0.1μs       15.9±0.5μs     1.10  series_methods.NanOps.time_func('min', 1000, 'Int64')                                                           
+        60.7±1ms         67.0±1ms     1.10  rolling.Groupby.time_rolling_int('max')                                                                         
+        61.5±1ms         67.9±1ms     1.10  rolling.Groupby.time_rolling_int('median')                                                                      
+        544±20ns         600±30ns     1.10  index_cached_properties.IndexCache.time_inferred_type('Int64Index')                                             
+     15.8±0.04μs       17.4±0.8μs     1.10  inference.MaybeConvertObjects.time_maybe_convert_objects                                                        
+       437±0.7ms         482±40ms     1.10  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 1000000, None)                                
+        60.5±2ms         66.6±1ms     1.10  rolling.Groupby.time_rolling_int('sum' (0))                                                                     
+     3.36±0.01μs       3.70±0.2μs     1.10  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(0, 'end', 'year', 'B', 12)                          
+     3.24±0.04μs       3.57±0.2μs     1.10  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(0, 'start', 'quarter', 'QS', 5)                     
+         142±2μs          157±1μs     1.10  indexing.NumericSeriesIndexing.time_loc_scalar(<class 'pandas.core.indexes.numeric.Float64Index'>, 'nonunique_mo
notonic_inc')                                                                                                                                                
+      41.1±0.3ms         45.2±1ms     1.10  frame_methods.Isnull.time_isnull_obj                                                                            
+     3.47±0.02μs       3.82±0.2μs     1.10  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(1, 'end', 'quarter', 'QS', 12)                      
-      8.68±0.5μs      7.89±0.02μs     0.91  tslibs.resolution.TimeResolution.time_get_resolution('h', 0, datetime.timezone(datetime.timedelta(seconds=3600))
)                                                                                                                                                            
-      8.79±0.6μs      7.98±0.04μs     0.91  tslibs.resolution.TimeResolution.time_get_resolution('D', 1, datetime.timezone(datetime.timedelta(seconds=3600))
)                                                                                                                                                            
-        18.0±1μs      16.3±0.05μs     0.91  tslibs.resolution.TimeResolution.time_get_resolution('ns', 1, tzlocal())                                        
-      9.93±0.6μs      9.01±0.02μs     0.91  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 4000, datetime.timezone(datetime.timedelta(sec
onds=3600)))                                                                                                                                                 
-      1.10±0.09s          998±5ms     0.91  arithmetic.OffsetArrayArithmetic.time_add_series_offset(<CustomBusinessMonthEnd> (0))
-      2.71±0.2μs         2.46±0μs     0.91  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('date', 0, None)
-      9.98±0.7μs      9.05±0.03μs     0.91  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 2000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      2.75±0.2μs      2.50±0.02μs     0.91  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 0, tzlocal())
-      2.72±0.2μs         2.46±0μs     0.91  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('timestamp', 0, None)
-      9.98±0.5μs      9.03±0.09μs     0.91  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 5000, datetime.timezone(datetime.timedelta(sec
onds=3600)))

-      9.11±0.3μs       8.24±0.3μs     0.90  tslibs.timestamp.TimestampProperties.time_weekday_name(tzlocal(), None)
-      9.91±0.6μs       8.96±0.1μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 12000, datetime.timezone(datetime.timedelta(se
conds=3600)))
-      10.0±0.6μs      9.06±0.04μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 3000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      10.1±0.8μs      9.09±0.03μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 8000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      9.94±0.7μs       8.99±0.1μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 10000, datetime.timezone(datetime.timedelta(se
conds=3600)))
-        19.8±1μs      17.9±0.05μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(100, 2000, <DstTzInfo 'US/Pacific' LMT-1 day, 16:
07:00 STD>)
-      1.11±0.09s          998±3ms     0.90  arithmetic.OffsetArrayArithmetic.time_add_dti_offset(<CustomBusinessMonthEnd> (0))
-      9.91±0.6μs      8.95±0.08μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 7000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      9.99±0.5μs      9.01±0.09μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 1000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      2.58±0.2μs      2.32±0.01μs     0.90  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 0, datetime.timezone.utc)
-      9.97±0.7μs      8.96±0.06μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 11000, datetime.timezone(datetime.timedelta(se
conds=3600)))
-      29.3±0.3ms       26.2±0.5ms     0.90  rolling.Apply.time_rolling('Series', 3, 'int', <built-in function sum>, False)
-      10.1±0.7μs      9.03±0.06μs     0.89  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 9000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      2.60±0.2μs      2.32±0.01μs     0.89  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('timestamp', 0, datetime.timezone.utc)
-      10.0±0.7μs      8.98±0.04μs     0.89  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 6000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      8.65±0.2μs       7.73±0.1μs     0.89  tslibs.period.TimePeriodArrToDT64Arr.time_periodarray_to_dt64arr(1, 7000)
-      9.99±0.7μs      8.93±0.05μs     0.89  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 2011, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-        26.7±2μs       23.8±0.2μs     0.89  tslibs.timestamp.TimestampOps.time_normalize(tzlocal())
-      2.77±0.2μs      2.47±0.02μs     0.89  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 0, None)
-      6.29±0.6μs      5.60±0.08μs     0.89  tslibs.timedelta.TimedeltaConstructor.time_from_unit
-      11.6±0.8μs       10.3±0.1μs     0.89  tslibs.timestamp.TimestampOps.time_replace_None(datetime.timezone(datetime.timedelta(seconds=3600)))
-      10.1±0.6μs      8.95±0.06μs     0.89  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 10000, datetime.timezone(datetime.timedelta(se
conds=3600)))
-      5.64±0.3μs      5.00±0.04μs     0.89  tslibs.timestamp.TimestampOps.time_normalize(None)
-        58.8±3μs       51.9±0.6μs     0.88  tslibs.timestamp.TimestampOps.time_normalize(tzfile('/usr/share/zoneinfo/Asia/Tokyo'))
-      3.19±0.3μs      2.80±0.02μs     0.88  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('date', 1, None)
-        9.72±1μs      8.54±0.05μs     0.88  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('timestamp', 0, datetime.timezone(datetime.timedelta(s
econds=3600)))
-      2.85±0.2μs      2.50±0.01μs     0.88  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('timestamp', 0, tzlocal())
-     1.27±0.03ms      1.11±0.06ms     0.87  arithmetic.IntFrameWithScalar.time_frame_op_with_scalar(<class 'numpy.float64'>, 4, <built-in function mul>)
-        15.5±2μs       13.6±0.2μs     0.87  tslibs.timedelta.TimedeltaConstructor.time_from_components
-        8.74±1μs      7.57±0.05μs     0.87  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 6000, <DstTzInfo 'US/Pacific' LMT-1 day, 16:07
:00 STD>)
-      8.05±0.8μs      6.92±0.03μs     0.86  tslibs.timestamp.TimestampOps.time_replace_None(<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>)
-     1.59±0.01ms      1.36±0.01ms     0.85  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(100, 4000, tzlocal())
-      6.61±0.6μs      5.65±0.08μs     0.85  tslibs.timedelta.TimedeltaConstructor.time_from_string
-        24.1±2μs       20.5±0.1μs     0.85  tslibs.timestamp.TimestampOps.time_normalize(datetime.timezone(datetime.timedelta(seconds=3600)))
-     11.7±0.05ms      9.84±0.06ms     0.84  indexing.NumericSeriesIndexing.time_loc_scalar(<class 'pandas.core.indexes.numeric.UInt64Index'>, 'nonunique_mon
otonic_inc')
-      1.66±0.1ms      1.39±0.07ms     0.84  series_methods.NanOps.time_func('prod', 1000000, 'int8')
-      10.8±0.2μs      9.05±0.04μs     0.84  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 4006, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      10.7±0.3μs       8.95±0.2μs     0.83  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 1011, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      1.76±0.2ms       1.46±0.3ms     0.83  index_cached_properties.IndexCache.time_engine('MultiIndex')
-      57.4±0.4ms      34.8±0.07ms     0.61  algos.isin.IsinWithArange.time_isin(<class 'numpy.object_'>, 8000, 0)
-        93.1±1ms          400±3μs     0.00  hash_functions.NumericSeriesIndexing.time_loc_slice(<class 'pandas.core.indexes.numeric.Float64Index'>, 1000000)