Fix O(N²) in add_const() after constant folding moved to CFG · zSirius/cpython@120e518 (original) (raw)
`@@ -6,6 +6,7 @@
`
6
6
`#include "pycore_intrinsics.h"
`
7
7
`#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
`
8
8
`#include "pycore_long.h" // _PY_IS_SMALL_INT()
`
``
9
`+
#include "pycore_hashtable.h" // _Py_hashtable_t
`
9
10
``
10
11
`#include "pycore_opcode_utils.h"
`
11
12
`#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
`
`@@ -1325,30 +1326,43 @@ get_const_value(int opcode, int oparg, PyObject *co_consts)
`
1325
1326
``
1326
1327
`// Steals a reference to newconst.
`
1327
1328
`static int
`
1328
``
`-
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
`
``
1329
`+
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
`
``
1330
`+
_Py_hashtable_t *consts_index)
`
1329
1331
`{
`
1330
1332
`if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
`
1331
1333
`Py_DECREF(newconst);
`
1332
1334
`return -1;
`
1333
1335
` }
`
1334
1336
``
1335
``
`-
Py_ssize_t index;
`
1336
``
`-
for (index = 0; index < PyList_GET_SIZE(consts); index++) {
`
1337
``
`-
if (PyList_GET_ITEM(consts, index) == newconst) {
`
1338
``
`-
break;
`
1339
``
`-
}
`
``
1337
`+
/* O(1) lookup via pointer-keyed hashtable (replaces linear search). */
`
``
1338
`+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
`
``
1339
`+
if (entry != NULL) {
`
``
1340
`+
/* Already exists */
`
``
1341
`+
Py_DECREF(newconst);
`
``
1342
`+
return (int)(uintptr_t)entry->value;
`
1340
1343
` }
`
1341
``
`-
if (index == PyList_GET_SIZE(consts)) {
`
1342
``
`-
if ((size_t)index >= (size_t)INT_MAX - 1) {
`
1343
``
`-
PyErr_SetString(PyExc_OverflowError, "too many constants");
`
1344
``
`-
Py_DECREF(newconst);
`
1345
``
`-
return -1;
`
1346
``
`-
}
`
1347
``
`-
if (PyList_Append(consts, newconst)) {
`
1348
``
`-
Py_DECREF(newconst);
`
1349
``
`-
return -1;
`
1350
``
`-
}
`
``
1344
+
``
1345
`+
/* Not found – append to consts list */
`
``
1346
`+
Py_ssize_t index = PyList_GET_SIZE(consts);
`
``
1347
`+
if ((size_t)index >= (size_t)INT_MAX - 1) {
`
``
1348
`+
PyErr_SetString(PyExc_OverflowError, "too many constants");
`
``
1349
`+
Py_DECREF(newconst);
`
``
1350
`+
return -1;
`
``
1351
`+
}
`
``
1352
`+
if (PyList_Append(consts, newconst)) {
`
``
1353
`+
Py_DECREF(newconst);
`
``
1354
`+
return -1;
`
1351
1355
` }
`
``
1356
+
``
1357
`+
/* Update index (must be after successful append) */
`
``
1358
`+
if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
`
``
1359
`+
/* OOM – rollback append for consistency */
`
``
1360
`+
PyList_SetSlice(consts, index, index + 1, NULL);
`
``
1361
`+
Py_DECREF(newconst);
`
``
1362
`+
PyErr_NoMemory();
`
``
1363
`+
return -1;
`
``
1364
`+
}
`
``
1365
+
1352
1366
`Py_DECREF(newconst);
`
1353
1367
`return (int)index;
`
1354
1368
`}
`
`@@ -1424,7 +1438,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
`
1424
1438
`/* Steals reference to "newconst" */
`
1425
1439
`static int
`
1426
1440
`instr_make_load_const(cfg_instr *instr, PyObject *newconst,
`
1427
``
`-
PyObject *consts, PyObject *const_cache)
`
``
1441
`+
PyObject *consts, PyObject *const_cache,
`
``
1442
`+
_Py_hashtable_t *consts_index)
`
1428
1443
`{
`
1429
1444
`int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
`
1430
1445
`if (res < 0) {
`
`@@ -1434,7 +1449,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
`
1434
1449
`if (res > 0) {
`
1435
1450
`return SUCCESS;
`
1436
1451
` }
`
1437
``
`-
int oparg = add_const(newconst, consts, const_cache);
`
``
1452
`+
int oparg = add_const(newconst, consts, const_cache, consts_index);
`
1438
1453
`RETURN_IF_ERROR(oparg);
`
1439
1454
`INSTR_SET_OP1(instr, LOAD_CONST, oparg);
`
1440
1455
`return SUCCESS;
`
`@@ -1447,7 +1462,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
`
1447
1462
` Called with codestr pointing to the first LOAD_CONST.
`
1448
1463
`*/
`
1449
1464
`static int
`
1450
``
`-
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
`
``
1465
`+
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
`
``
1466
`+
PyObject *const_cache, _Py_hashtable_t *consts_index)
`
1451
1467
`{
`
1452
1468
`/* Pre-conditions */
`
1453
1469
`assert(PyDict_CheckExact(const_cache));
`
`@@ -1484,7 +1500,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
`
1484
1500
` }
`
1485
1501
``
1486
1502
`nop_out(const_instrs, seq_size);
`
1487
``
`-
return instr_make_load_const(instr, const_tuple, consts, const_cache);
`
``
1503
`+
return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
`
1488
1504
`}
`
1489
1505
``
1490
1506
`/* Replace:
`
`@@ -1502,7 +1518,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
`
1502
1518
`*/
`
1503
1519
`static int
`
1504
1520
`fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
`
1505
``
`-
PyObject *consts, PyObject *const_cache)
`
``
1521
`+
PyObject *consts, PyObject *const_cache,
`
``
1522
`+
_Py_hashtable_t *consts_index)
`
1506
1523
`{
`
1507
1524
`assert(PyDict_CheckExact(const_cache));
`
1508
1525
`assert(PyList_CheckExact(consts));
`
`@@ -1554,7 +1571,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
`
1554
1571
`nop_out(&instr, 1);
`
1555
1572
` }
`
1556
1573
`assert(consts_found == 0);
`
1557
``
`-
return instr_make_load_const(intrinsic, newconst, consts, const_cache);
`
``
1574
`+
return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
`
1558
1575
` }
`
1559
1576
``
1560
1577
`if (expect_append) {
`
`@@ -1590,7 +1607,8 @@ Optimize lists and sets for:
`
1590
1607
`*/
`
1591
1608
`static int
`
1592
1609
`optimize_lists_and_sets(basicblock *bb, int i, int nextop,
`
1593
``
`-
PyObject *consts, PyObject *const_cache)
`
``
1610
`+
PyObject *consts, PyObject *const_cache,
`
``
1611
`+
_Py_hashtable_t *consts_index)
`
1594
1612
`{
`
1595
1613
`assert(PyDict_CheckExact(const_cache));
`
1596
1614
`assert(PyList_CheckExact(consts));
`
`@@ -1640,7 +1658,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
`
1640
1658
`Py_SETREF(const_result, frozenset);
`
1641
1659
` }
`
1642
1660
``
1643
``
`-
int index = add_const(const_result, consts, const_cache);
`
``
1661
`+
int index = add_const(const_result, consts, const_cache, consts_index);
`
1644
1662
`RETURN_IF_ERROR(index);
`
1645
1663
`nop_out(const_instrs, seq_size);
`
1646
1664
``
`@@ -1837,7 +1855,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)
`
1837
1855
`}
`
1838
1856
``
1839
1857
`static int
`
1840
``
`-
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
`
``
1858
`+
fold_const_binop(basicblock *bb, int i, PyObject *consts,
`
``
1859
`+
PyObject *const_cache, _Py_hashtable_t *consts_index)
`
1841
1860
`{
`
1842
1861
`#define BINOP_OPERAND_COUNT 2
`
1843
1862
`assert(PyDict_CheckExact(const_cache));
`
`@@ -1879,7 +1898,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
`
1879
1898
` }
`
1880
1899
``
1881
1900
`nop_out(operands_instrs, BINOP_OPERAND_COUNT);
`
1882
``
`-
return instr_make_load_const(binop, newconst, consts, const_cache);
`
``
1901
`+
return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
`
1883
1902
`}
`
1884
1903
``
1885
1904
`static PyObject *
`
`@@ -1925,7 +1944,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg)
`
1925
1944
`}
`
1926
1945
``
1927
1946
`static int
`
1928
``
`-
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
`
``
1947
`+
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
`
``
1948
`+
PyObject *const_cache, _Py_hashtable_t *consts_index)
`
1929
1949
`{
`
1930
1950
`#define UNARYOP_OPERAND_COUNT 1
`
1931
1951
`assert(PyDict_CheckExact(const_cache));
`
`@@ -1962,7 +1982,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach
`
1962
1982
`assert(PyBool_Check(newconst));
`
1963
1983
` }
`
1964
1984
`nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
`
1965
``
`-
return instr_make_load_const(unaryop, newconst, consts, const_cache);
`
``
1985
`+
return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
`
1966
1986
`}
`
1967
1987
``
1968
1988
`#define VISITED (-1)
`
`@@ -2157,7 +2177,8 @@ apply_static_swaps(basicblock *block, int i)
`
2157
2177
`}
`
2158
2178
``
2159
2179
`static int
`
2160
``
`-
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
`
``
2180
`+
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
`
``
2181
`+
PyObject *consts, _Py_hashtable_t *consts_index)
`
2161
2182
`{
`
2162
2183
`assert(PyDict_CheckExact(const_cache));
`
2163
2184
`assert(PyList_CheckExact(consts));
`
`@@ -2272,7 +2293,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
`
2272
2293
`return ERROR;
`
2273
2294
` }
`
2274
2295
`cnt = PyBool_FromLong(is_true);
`
2275
``
`-
int index = add_const(cnt, consts, const_cache);
`
``
2296
`+
int index = add_const(cnt, consts, const_cache, consts_index);
`
2276
2297
`if (index < 0) {
`
2277
2298
`return ERROR;
`
2278
2299
` }
`
`@@ -2286,15 +2307,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
`
2286
2307
`}
`
2287
2308
``
2288
2309
`static int
`
2289
``
`-
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
`
``
2310
`+
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
`
``
2311
`+
_Py_hashtable_t *consts_index) {
`
2290
2312
`for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
`
2291
``
`-
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
`
``
2313
`+
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
`
2292
2314
` }
`
2293
2315
`return SUCCESS;
`
2294
2316
`}
`
2295
2317
``
2296
2318
`static int
`
2297
``
`-
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
`
``
2319
`+
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
`
``
2320
`+
_Py_hashtable_t *consts_index)
`
2298
2321
`{
`
2299
2322
`assert(PyDict_CheckExact(const_cache));
`
2300
2323
`assert(PyList_CheckExact(consts));
`
`@@ -2334,11 +2357,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
`
2334
2357
`continue;
`
2335
2358
` }
`
2336
2359
` }
`
2337
``
`-
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
`
``
2360
`+
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
`
2338
2361
`break;
`
2339
2362
`case BUILD_LIST:
`
2340
2363
`case BUILD_SET:
`
2341
``
`-
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
`
``
2364
`+
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
`
2342
2365
`break;
`
2343
2366
`case POP_JUMP_IF_NOT_NONE:
`
2344
2367
`case POP_JUMP_IF_NONE:
`
`@@ -2473,23 +2496,23 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
`
2473
2496
`_Py_FALLTHROUGH;
`
2474
2497
`case UNARY_INVERT:
`
2475
2498
`case UNARY_NEGATIVE:
`
2476
``
`-
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
`
``
2499
`+
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
`
2477
2500
`break;
`
2478
2501
`case CALL_INTRINSIC_1:
`
2479
2502
`if (oparg == INTRINSIC_LIST_TO_TUPLE) {
`
2480
2503
`if (nextop == GET_ITER) {
`
2481
2504
`INSTR_SET_OP0(inst, NOP);
`
2482
2505
` }
`
2483
2506
`else {
`
2484
``
`-
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
`
``
2507
`+
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
`
2485
2508
` }
`
2486
2509
` }
`
2487
2510
`else if (oparg == INTRINSIC_UNARY_POSITIVE) {
`
2488
``
`-
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
`
``
2511
`+
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
`
2489
2512
` }
`
2490
2513
`break;
`
2491
2514
`case BINARY_OP:
`
2492
``
`-
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
`
``
2515
`+
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
`
2493
2516
`break;
`
2494
2517
` }
`
2495
2518
` }
`
`@@ -2534,16 +2557,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)
`
2534
2557
` NOPs. Later those NOPs are removed.
`
2535
2558
`*/
`
2536
2559
`static int
`
2537
``
`-
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
`
``
2560
`+
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
`
``
2561
`+
_Py_hashtable_t *consts_index, int firstlineno)
`
2538
2562
`{
`
2539
2563
`assert(PyDict_CheckExact(const_cache));
`
2540
2564
`RETURN_IF_ERROR(check_cfg(g));
`
2541
2565
`RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
`
2542
2566
`RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
`
2543
2567
`RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
`
2544
``
`-
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
`
``
2568
`+
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
`
2545
2569
`for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
`
2546
``
`-
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
`
``
2570
`+
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
`
2547
2571
` }
`
2548
2572
`RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
`
2549
2573
`RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
`
`@@ -3655,7 +3679,36 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
`
3655
3679
`RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
`
3656
3680
``
3657
3681
`/** Optimization **/
`
3658
``
`-
RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
`
``
3682
+
``
3683
`+
/* Auxiliary pointer→index hashtable for O(1) lookup in add_const. */
`
``
3684
`+
_Py_hashtable_t *consts_index = _Py_hashtable_new(
`
``
3685
`+
_Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
`
``
3686
`+
if (consts_index == NULL) {
`
``
3687
`+
PyErr_NoMemory();
`
``
3688
`+
return ERROR;
`
``
3689
`+
}
`
``
3690
+
``
3691
`+
/* Seed the index with pre-existing constants. */
`
``
3692
`+
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
`
``
3693
`+
PyObject *item = PyList_GET_ITEM(consts, i);
`
``
3694
`+
if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
`
``
3695
`+
continue; /* duplicate pointer; keep first occurrence */
`
``
3696
`+
}
`
``
3697
`+
if (_Py_hashtable_set(consts_index, (void *)item,
`
``
3698
`+
(void *)(uintptr_t)i) < 0) {
`
``
3699
`+
_Py_hashtable_destroy(consts_index);
`
``
3700
`+
PyErr_NoMemory();
`
``
3701
`+
return ERROR;
`
``
3702
`+
}
`
``
3703
`+
}
`
``
3704
+
``
3705
`+
int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
`
``
3706
+
``
3707
`+
/* consts_index is invalid after this (consts list may be modified). */
`
``
3708
`+
_Py_hashtable_destroy(consts_index);
`
``
3709
+
``
3710
`+
RETURN_IF_ERROR(ret);
`
``
3711
+
3659
3712
`RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
`
3660
3713
`RETURN_IF_ERROR(
`
3661
3714
`add_checks_for_loads_of_uninitialized_variables(
`