Fix O(N²) in add_const() after constant folding moved to CFG · zSirius/cpython@120e518 (original) (raw)

`@@ -6,6 +6,7 @@

`

6

6

`#include "pycore_intrinsics.h"

`

7

7

`#include "pycore_pymem.h" // _PyMem_IsPtrFreed()

`

8

8

`#include "pycore_long.h" // _PY_IS_SMALL_INT()

`

``

9

`+

#include "pycore_hashtable.h" // _Py_hashtable_t

`

9

10

``

10

11

`#include "pycore_opcode_utils.h"

`

11

12

`#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc

`

`@@ -1325,30 +1326,43 @@ get_const_value(int opcode, int oparg, PyObject *co_consts)

`

1325

1326

``

1326

1327

`// Steals a reference to newconst.

`

1327

1328

`static int

`

1328

``

`-

add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)

`

``

1329

`+

add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,

`

``

1330

`+

_Py_hashtable_t *consts_index)

`

1329

1331

`{

`

1330

1332

`if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {

`

1331

1333

`Py_DECREF(newconst);

`

1332

1334

`return -1;

`

1333

1335

` }

`

1334

1336

``

1335

``

`-

Py_ssize_t index;

`

1336

``

`-

for (index = 0; index < PyList_GET_SIZE(consts); index++) {

`

1337

``

`-

if (PyList_GET_ITEM(consts, index) == newconst) {

`

1338

``

`-

break;

`

1339

``

`-

}

`

``

1337

`+

/* O(1) lookup via pointer-keyed hashtable (replaces linear search). */

`

``

1338

`+

_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);

`

``

1339

`+

if (entry != NULL) {

`

``

1340

`+

/* Already exists */

`

``

1341

`+

Py_DECREF(newconst);

`

``

1342

`+

return (int)(uintptr_t)entry->value;

`

1340

1343

` }

`

1341

``

`-

if (index == PyList_GET_SIZE(consts)) {

`

1342

``

`-

if ((size_t)index >= (size_t)INT_MAX - 1) {

`

1343

``

`-

PyErr_SetString(PyExc_OverflowError, "too many constants");

`

1344

``

`-

Py_DECREF(newconst);

`

1345

``

`-

return -1;

`

1346

``

`-

}

`

1347

``

`-

if (PyList_Append(consts, newconst)) {

`

1348

``

`-

Py_DECREF(newconst);

`

1349

``

`-

return -1;

`

1350

``

`-

}

`

``

1344

+

``

1345

`+

/* Not found – append to consts list */

`

``

1346

`+

Py_ssize_t index = PyList_GET_SIZE(consts);

`

``

1347

`+

if ((size_t)index >= (size_t)INT_MAX - 1) {

`

``

1348

`+

PyErr_SetString(PyExc_OverflowError, "too many constants");

`

``

1349

`+

Py_DECREF(newconst);

`

``

1350

`+

return -1;

`

``

1351

`+

}

`

``

1352

`+

if (PyList_Append(consts, newconst)) {

`

``

1353

`+

Py_DECREF(newconst);

`

``

1354

`+

return -1;

`

1351

1355

` }

`

``

1356

+

``

1357

`+

/* Update index (must be after successful append) */

`

``

1358

`+

if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {

`

``

1359

`+

/* OOM – rollback append for consistency */

`

``

1360

`+

PyList_SetSlice(consts, index, index + 1, NULL);

`

``

1361

`+

Py_DECREF(newconst);

`

``

1362

`+

PyErr_NoMemory();

`

``

1363

`+

return -1;

`

``

1364

`+

}

`

``

1365

+

1352

1366

`Py_DECREF(newconst);

`

1353

1367

`return (int)index;

`

1354

1368

`}

`

`@@ -1424,7 +1438,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,

`

1424

1438

`/* Steals reference to "newconst" */

`

1425

1439

`static int

`

1426

1440

`instr_make_load_const(cfg_instr *instr, PyObject *newconst,

`

1427

``

`-

PyObject *consts, PyObject *const_cache)

`

``

1441

`+

PyObject *consts, PyObject *const_cache,

`

``

1442

`+

_Py_hashtable_t *consts_index)

`

1428

1443

`{

`

1429

1444

`int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);

`

1430

1445

`if (res < 0) {

`

`@@ -1434,7 +1449,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,

`

1434

1449

`if (res > 0) {

`

1435

1450

`return SUCCESS;

`

1436

1451

` }

`

1437

``

`-

int oparg = add_const(newconst, consts, const_cache);

`

``

1452

`+

int oparg = add_const(newconst, consts, const_cache, consts_index);

`

1438

1453

`RETURN_IF_ERROR(oparg);

`

1439

1454

`INSTR_SET_OP1(instr, LOAD_CONST, oparg);

`

1440

1455

`return SUCCESS;

`

`@@ -1447,7 +1462,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,

`

1447

1462

` Called with codestr pointing to the first LOAD_CONST.

`

1448

1463

`*/

`

1449

1464

`static int

`

1450

``

`-

fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)

`

``

1465

`+

fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,

`

``

1466

`+

PyObject *const_cache, _Py_hashtable_t *consts_index)

`

1451

1467

`{

`

1452

1468

`/* Pre-conditions */

`

1453

1469

`assert(PyDict_CheckExact(const_cache));

`

`@@ -1484,7 +1500,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const

`

1484

1500

` }

`

1485

1501

``

1486

1502

`nop_out(const_instrs, seq_size);

`

1487

``

`-

return instr_make_load_const(instr, const_tuple, consts, const_cache);

`

``

1503

`+

return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);

`

1488

1504

`}

`

1489

1505

``

1490

1506

`/* Replace:

`

`@@ -1502,7 +1518,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const

`

1502

1518

`*/

`

1503

1519

`static int

`

1504

1520

`fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,

`

1505

``

`-

PyObject *consts, PyObject *const_cache)

`

``

1521

`+

PyObject *consts, PyObject *const_cache,

`

``

1522

`+

_Py_hashtable_t *consts_index)

`

1506

1523

`{

`

1507

1524

`assert(PyDict_CheckExact(const_cache));

`

1508

1525

`assert(PyList_CheckExact(consts));

`

`@@ -1554,7 +1571,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,

`

1554

1571

`nop_out(&instr, 1);

`

1555

1572

` }

`

1556

1573

`assert(consts_found == 0);

`

1557

``

`-

return instr_make_load_const(intrinsic, newconst, consts, const_cache);

`

``

1574

`+

return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);

`

1558

1575

` }

`

1559

1576

``

1560

1577

`if (expect_append) {

`

`@@ -1590,7 +1607,8 @@ Optimize lists and sets for:

`

1590

1607

`*/

`

1591

1608

`static int

`

1592

1609

`optimize_lists_and_sets(basicblock *bb, int i, int nextop,

`

1593

``

`-

PyObject *consts, PyObject *const_cache)

`

``

1610

`+

PyObject *consts, PyObject *const_cache,

`

``

1611

`+

_Py_hashtable_t *consts_index)

`

1594

1612

`{

`

1595

1613

`assert(PyDict_CheckExact(const_cache));

`

1596

1614

`assert(PyList_CheckExact(consts));

`

`@@ -1640,7 +1658,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,

`

1640

1658

`Py_SETREF(const_result, frozenset);

`

1641

1659

` }

`

1642

1660

``

1643

``

`-

int index = add_const(const_result, consts, const_cache);

`

``

1661

`+

int index = add_const(const_result, consts, const_cache, consts_index);

`

1644

1662

`RETURN_IF_ERROR(index);

`

1645

1663

`nop_out(const_instrs, seq_size);

`

1646

1664

``

`@@ -1837,7 +1855,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)

`

1837

1855

`}

`

1838

1856

``

1839

1857

`static int

`

1840

``

`-

fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)

`

``

1858

`+

fold_const_binop(basicblock *bb, int i, PyObject *consts,

`

``

1859

`+

PyObject *const_cache, _Py_hashtable_t *consts_index)

`

1841

1860

`{

`

1842

1861

`#define BINOP_OPERAND_COUNT 2

`

1843

1862

`assert(PyDict_CheckExact(const_cache));

`

`@@ -1879,7 +1898,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)

`

1879

1898

` }

`

1880

1899

``

1881

1900

`nop_out(operands_instrs, BINOP_OPERAND_COUNT);

`

1882

``

`-

return instr_make_load_const(binop, newconst, consts, const_cache);

`

``

1901

`+

return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);

`

1883

1902

`}

`

1884

1903

``

1885

1904

`static PyObject *

`

`@@ -1925,7 +1944,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg)

`

1925

1944

`}

`

1926

1945

``

1927

1946

`static int

`

1928

``

`-

fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)

`

``

1947

`+

fold_const_unaryop(basicblock *bb, int i, PyObject *consts,

`

``

1948

`+

PyObject *const_cache, _Py_hashtable_t *consts_index)

`

1929

1949

`{

`

1930

1950

`#define UNARYOP_OPERAND_COUNT 1

`

1931

1951

`assert(PyDict_CheckExact(const_cache));

`

`@@ -1962,7 +1982,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach

`

1962

1982

`assert(PyBool_Check(newconst));

`

1963

1983

` }

`

1964

1984

`nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);

`

1965

``

`-

return instr_make_load_const(unaryop, newconst, consts, const_cache);

`

``

1985

`+

return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);

`

1966

1986

`}

`

1967

1987

``

1968

1988

`#define VISITED (-1)

`

`@@ -2157,7 +2177,8 @@ apply_static_swaps(basicblock *block, int i)

`

2157

2177

`}

`

2158

2178

``

2159

2179

`static int

`

2160

``

`-

basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)

`

``

2180

`+

basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,

`

``

2181

`+

PyObject *consts, _Py_hashtable_t *consts_index)

`

2161

2182

`{

`

2162

2183

`assert(PyDict_CheckExact(const_cache));

`

2163

2184

`assert(PyList_CheckExact(consts));

`

`@@ -2272,7 +2293,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *

`

2272

2293

`return ERROR;

`

2273

2294

` }

`

2274

2295

`cnt = PyBool_FromLong(is_true);

`

2275

``

`-

int index = add_const(cnt, consts, const_cache);

`

``

2296

`+

int index = add_const(cnt, consts, const_cache, consts_index);

`

2276

2297

`if (index < 0) {

`

2277

2298

`return ERROR;

`

2278

2299

` }

`

`@@ -2286,15 +2307,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *

`

2286

2307

`}

`

2287

2308

``

2288

2309

`static int

`

2289

``

`-

optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {

`

``

2310

`+

optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,

`

``

2311

`+

_Py_hashtable_t *consts_index) {

`

2290

2312

`for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {

`

2291

``

`-

RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));

`

``

2313

`+

RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));

`

2292

2314

` }

`

2293

2315

`return SUCCESS;

`

2294

2316

`}

`

2295

2317

``

2296

2318

`static int

`

2297

``

`-

optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)

`

``

2319

`+

optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,

`

``

2320

`+

_Py_hashtable_t *consts_index)

`

2298

2321

`{

`

2299

2322

`assert(PyDict_CheckExact(const_cache));

`

2300

2323

`assert(PyList_CheckExact(consts));

`

`@@ -2334,11 +2357,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)

`

2334

2357

`continue;

`

2335

2358

` }

`

2336

2359

` }

`

2337

``

`-

RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));

`

``

2360

`+

RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));

`

2338

2361

`break;

`

2339

2362

`case BUILD_LIST:

`

2340

2363

`case BUILD_SET:

`

2341

``

`-

RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));

`

``

2364

`+

RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));

`

2342

2365

`break;

`

2343

2366

`case POP_JUMP_IF_NOT_NONE:

`

2344

2367

`case POP_JUMP_IF_NONE:

`

`@@ -2473,23 +2496,23 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)

`

2473

2496

`_Py_FALLTHROUGH;

`

2474

2497

`case UNARY_INVERT:

`

2475

2498

`case UNARY_NEGATIVE:

`

2476

``

`-

RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));

`

``

2499

`+

RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));

`

2477

2500

`break;

`

2478

2501

`case CALL_INTRINSIC_1:

`

2479

2502

`if (oparg == INTRINSIC_LIST_TO_TUPLE) {

`

2480

2503

`if (nextop == GET_ITER) {

`

2481

2504

`INSTR_SET_OP0(inst, NOP);

`

2482

2505

` }

`

2483

2506

`else {

`

2484

``

`-

RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));

`

``

2507

`+

RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));

`

2485

2508

` }

`

2486

2509

` }

`

2487

2510

`else if (oparg == INTRINSIC_UNARY_POSITIVE) {

`

2488

``

`-

RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));

`

``

2511

`+

RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));

`

2489

2512

` }

`

2490

2513

`break;

`

2491

2514

`case BINARY_OP:

`

2492

``

`-

RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));

`

``

2515

`+

RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));

`

2493

2516

`break;

`

2494

2517

` }

`

2495

2518

` }

`

`@@ -2534,16 +2557,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)

`

2534

2557

` NOPs. Later those NOPs are removed.

`

2535

2558

`*/

`

2536

2559

`static int

`

2537

``

`-

optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)

`

``

2560

`+

optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,

`

``

2561

`+

_Py_hashtable_t *consts_index, int firstlineno)

`

2538

2562

`{

`

2539

2563

`assert(PyDict_CheckExact(const_cache));

`

2540

2564

`RETURN_IF_ERROR(check_cfg(g));

`

2541

2565

`RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));

`

2542

2566

`RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));

`

2543

2567

`RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));

`

2544

``

`-

RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));

`

``

2568

`+

RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));

`

2545

2569

`for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {

`

2546

``

`-

RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));

`

``

2570

`+

RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));

`

2547

2571

` }

`

2548

2572

`RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));

`

2549

2573

`RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));

`

`@@ -3655,7 +3679,36 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,

`

3655

3679

`RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));

`

3656

3680

``

3657

3681

`/** Optimization **/

`

3658

``

`-

RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));

`

``

3682

+

``

3683

`+

/* Auxiliary pointer→index hashtable for O(1) lookup in add_const. */

`

``

3684

`+

_Py_hashtable_t *consts_index = _Py_hashtable_new(

`

``

3685

`+

_Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);

`

``

3686

`+

if (consts_index == NULL) {

`

``

3687

`+

PyErr_NoMemory();

`

``

3688

`+

return ERROR;

`

``

3689

`+

}

`

``

3690

+

``

3691

`+

/* Seed the index with pre-existing constants. */

`

``

3692

`+

for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {

`

``

3693

`+

PyObject *item = PyList_GET_ITEM(consts, i);

`

``

3694

`+

if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {

`

``

3695

`+

continue; /* duplicate pointer; keep first occurrence */

`

``

3696

`+

}

`

``

3697

`+

if (_Py_hashtable_set(consts_index, (void *)item,

`

``

3698

`+

(void *)(uintptr_t)i) < 0) {

`

``

3699

`+

_Py_hashtable_destroy(consts_index);

`

``

3700

`+

PyErr_NoMemory();

`

``

3701

`+

return ERROR;

`

``

3702

`+

}

`

``

3703

`+

}

`

``

3704

+

``

3705

`+

int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);

`

``

3706

+

``

3707

`+

/* consts_index is invalid after this (consts list may be modified). */

`

``

3708

`+

_Py_hashtable_destroy(consts_index);

`

``

3709

+

``

3710

`+

RETURN_IF_ERROR(ret);

`

``

3711

+

3659

3712

`RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));

`

3660

3713

`RETURN_IF_ERROR(

`

3661

3714

`add_checks_for_loads_of_uninitialized_variables(

`