LLVM: lib/CodeGen/MachineScheduler.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

51#include "llvm/Config/llvm-config.h"

61#include

62#include

63#include

64#include

65#include

66#include

67#include

68#include

69#include

70#include

71

72using namespace llvm;

73

74#define DEBUG_TYPE "machine-scheduler"

75

76STATISTIC(NumClustered, "Number of load/store pairs clustered");

77

78namespace llvm {

79

81 "misched-prera-direction", cl::Hidden,

82 cl::desc("Pre reg-alloc list scheduling direction"),

86 "Force top-down pre reg-alloc list scheduling"),

88 "Force bottom-up pre reg-alloc list scheduling"),

90 "Force bidirectional pre reg-alloc list scheduling")));

91

93 "misched-postra-direction", cl::Hidden,

94 cl::desc("Post reg-alloc list scheduling direction"),

98 "Force top-down post reg-alloc list scheduling"),

100 "Force bottom-up post reg-alloc list scheduling"),

102 "Force bidirectional post reg-alloc list scheduling")));

103

106 cl::desc("Print critical path length to stdout"));

107

110 cl::desc("Verify machine instrs before and after machine scheduling"));

111

112#ifndef NDEBUG

115 cl::desc("Pop up a window to show MISched dags after they are processed"));

117 cl::desc("Print schedule DAGs"));

120 cl::desc("Dump resource usage at schedule boundary."));

123 cl::desc("Show details of invoking getNextResoufceCycle."));

124#else

128#ifdef LLVM_ENABLE_DUMP

130#endif

131#endif

132

133}

134

135#ifndef NDEBUG

136

137

139 cl::desc("Hide nodes with more predecessor/successor than cutoff"));

140

142 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));

143

145 cl::desc("Only schedule this function"));

147 cl::desc("Only schedule this MBB#"));

148#endif

149

150

151

153 cl::desc("Limit ready list to N instructions"), cl::init(256));

154

156 cl::desc("Enable register pressure scheduling."), cl::init(true));

157

159 cl::desc("Enable cyclic critical path analysis."), cl::init(true));

160

162 cl::desc("Enable memop clustering."),

166 cl::desc("Switch to fast cluster algorithm with the lost "

167 "of some fusion opportunities"),

171 cl::desc("The threshold for fast cluster"),

173

174#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

177 cl::desc("Dump resource usage at schedule boundary."));

180 cl::desc("Set width of the columns with "

181 "the resources and schedule units"),

185 cl::desc("Set width of the columns showing resource booking."),

189 cl::desc("Sort the resources printed in the dump trace"));

190#endif

191

195

196

198

199

200void MachineSchedStrategy::anchor() {}

201

202void ScheduleDAGMutation::anchor() {}

203

204

205

206

207

210}

211

214}

215

216namespace {

217

218

221public:

223

225

226protected:

228};

229

230

231class MachineScheduler : public MachineSchedulerBase {

232public:

233 MachineScheduler();

234

235 void getAnalysisUsage(AnalysisUsage &AU) const override;

236

238

239 static char ID;

240

241protected:

243};

244

245

246class PostMachineScheduler : public MachineSchedulerBase {

247public:

248 PostMachineScheduler();

249

250 void getAnalysisUsage(AnalysisUsage &AU) const override;

251

253

254 static char ID;

255

256protected:

258};

259

260}

261

262char MachineScheduler::ID = 0;

263

265

267 "Machine Instruction Scheduler", false, false)

275

276MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {

278}

279

280void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {

291}

292

293char PostMachineScheduler::ID = 0;

294

296

298 "PostRA Machine Instruction Scheduler", false, false)

304

305PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {

307}

308

309void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {

316}

317

320

321

322

324 return nullptr;

325}

326

327

332 cl::desc("Machine instruction scheduler to use"));

333

337

339 "enable-misched",

340 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),

342

344 "enable-post-misched",

345 cl::desc("Enable the post-ra machine instruction scheduling pass."),

347

348

352 assert(I != Beg && "reached the top of the region, cannot decrement");

353 while (--I != Beg) {

354 if (I->isDebugOrPseudoInstr())

355 break;

356 }

357 return I;

358}

359

360

366}

367

368

369

373 for(; I != End; ++I) {

374 if (I->isDebugOrPseudoInstr())

375 break;

376 }

377 return I;

378}

379

380

386}

387

388

390

393 return Ctor(this);

394

395

399

400

402}

403

404

405

406

407ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {

408

412

413

415}

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {

435 return false;

436

439 return false;

441 return false;

442

444

445

446 MF = &mf;

447 MLI = &getAnalysis().getLI();

448 MDT = &getAnalysis().getDomTree();

449 PassConfig = &getAnalysis();

450 AA = &getAnalysis().getAAResults();

451

452 LIS = &getAnalysis().getLIS();

453

456 MF->verify(this, "Before machine scheduling.", &errs());

457 }

458 RegClassInfo->runOnMachineFunction(*MF);

459

460

461

462 std::unique_ptr Scheduler(createMachineScheduler());

463 scheduleRegions(*Scheduler, false);

464

467 MF->verify(this, "After machine scheduling.", &errs());

468 return true;

469}

470

471bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {

473 return false;

474

477 return false;

479 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");

480 return false;

481 }

483

484

485 MF = &mf;

486 MLI = &getAnalysis().getLI();

487 PassConfig = &getAnalysis();

488 AA = &getAnalysis().getAAResults();

489

491 MF->verify(this, "Before post machine scheduling.", &errs());

492

493

494

495 std::unique_ptr Scheduler(createPostMachineScheduler());

496 scheduleRegions(*Scheduler, true);

497

499 MF->verify(this, "After post machine scheduling.", &errs());

500 return true;

501}

502

503

504

505

506

507

508

509

510

511

512

518 MI->isFakeUse();

519}

520

521

522namespace {

523struct SchedRegion {

524

525

526

527

528

531 unsigned NumRegionInstrs;

532

534 unsigned N) :

535 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}

536};

537}

538

540

541static void

544 bool RegionsTopDown) {

547

550 RegionEnd != MBB->begin(); RegionEnd = I) {

551

552

553 if (RegionEnd != MBB->end() ||

555 --RegionEnd;

556 }

557

558

559

560 unsigned NumRegionInstrs = 0;

561 I = RegionEnd;

565 break;

566 if (MI.isDebugOrPseudoInstr()) {

567

568

569 ++NumRegionInstrs;

570 }

571 }

572

573

574

575 if (NumRegionInstrs != 0)

576 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));

577 }

578

579 if (RegionsTopDown)

580 std::reverse(Regions.begin(), Regions.end());

581}

582

583

585 bool FixKillFlags) {

586

587

588

589

591 MBB != MBBEnd; ++MBB) {

592

594

595#ifndef NDEBUG

597 continue;

600 continue;

601#endif

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

619 for (const SchedRegion &R : MBBRegions) {

622 unsigned NumRegionInstrs = R.NumRegionInstrs;

623

624

625

626 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);

627

628

629 if (I == RegionEnd || I == std::prev(RegionEnd)) {

630

631

633 continue;

634 }

635 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

637 << " " << MBB->getName() << "\n From: " << *I

638 << " To: ";

639 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;

640 else dbgs() << "End\n";

641 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');

643 errs() << MF->getName();

646 }

647

648

649

651

652

654 }

656

657

658

659 if (FixKillFlags)

661 }

663}

664

665void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {

666

667}

668

669#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

671 dbgs() << "Queue " << Name << ": ";

672 for (const SUnit *SU : Queue)

673 dbgs() << SU->NodeNum << " ";

674 dbgs() << "\n";

675}

676#endif

677

678

679

680

681

682

683

684

686

687

688

689

690

693

694 if (SuccEdge->isWeak()) {

698 return;

699 }

700#ifndef NDEBUG

702 dbgs() << "*** Scheduling failed! ***\n";

704 dbgs() << " has been released too many times!\n";

706 }

707#endif

708

709

712

715 SchedImpl->releaseTopNode(SuccSU);

716}

717

718

722}

723

724

725

726

727

730

731 if (PredEdge->isWeak()) {

735 return;

736 }

737#ifndef NDEBUG

739 dbgs() << "*** Scheduling failed! ***\n";

741 dbgs() << " has been released too many times!\n";

743 }

744#endif

745

746

749

752 SchedImpl->releaseBottomNode(PredSU);

753}

754

755

759}

760

764}

765

769}

770

771

772

773

774

778 unsigned regioninstrs)

779{

781

783

784

786 if (SchedImpl->getPolicy().OnlyTopDown)

788 else if (SchedImpl->getPolicy().OnlyBottomUp)

790 else

793}

794

795

796

799

802

803

805

806

809

810

813}

814

816#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)

819 return false;

820 }

821 ++NumInstrsScheduled;

822#endif

823 return true;

824}

825

826

827

828

829

831 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");

833

834

836

838

841

845

846

847

849

850

852

853 bool IsTopNode = false;

854 while (true) {

855 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");

857 if (!SU) break;

858

861 break;

862

864 if (IsTopNode) {

865 assert(SU->isTopReady() && "node still has unscheduled dependencies");

868 else

870 } else {

871 assert(SU->isBottomReady() && "node still has unscheduled dependencies");

874 if (&*priorII == MI)

876 else {

881 }

882 }

883

884

885

886

887 SchedImpl->schedNode(SU, IsTopNode);

888

890 }

892

894

896 dbgs() << "*** Final schedule for "

899 dbgs() << '\n';

900 });

901}

902

903

906 m->apply(this);

907}

908

913 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");

914

915

916 SU.biasCriticalPath();

917

918

919 if (!SU.NumPredsLeft)

921

922 if (!SU.NumSuccsLeft)

924 }

926}

927

928

933

934

935

936

937

938 for (SUnit *SU : TopRoots)

940

941

942

944 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {

946 }

947

950

952

953

956}

957

958

960

961 if (IsTopNode)

963 else

965

967}

968

969

971

975 }

976

977 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator

979 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);

987 }

988}

989

990#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

992

994

996 return;

997

998

1000 return;

1001

1002 dbgs() << " * Schedule table (TopDown):\n";

1008 if (!SU)

1009 continue;

1013 PI != PE; ++PI) {

1014 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)

1015 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;

1016 }

1017 }

1018

1020 for (unsigned C = FirstCycle; C <= LastCycle; ++C)

1022 dbgs() << "|\n";

1023

1026 if (!SU) {

1027 dbgs() << "Missing SUnit\n";

1028 continue;

1029 }

1030 std::string NodeName("SU(");

1031 NodeName += std::to_string(SU->NodeNum) + ")";

1033 unsigned C = FirstCycle;

1034 for (; C <= LastCycle; ++C) {

1037 else

1039 }

1040 dbgs() << "|\n";

1042

1046

1051 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||

1052 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&

1053 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);

1054 });

1056 C = FirstCycle;

1057 const std::string ResName =

1060 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {

1062 }

1063 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;

1064 ++I, ++C)

1066 while (C++ <= LastCycle)

1068

1069 dbgs() << "| \n";

1070 }

1071 }

1072}

1073

1075

1077 return;

1078

1079

1081 return;

1082

1083 dbgs() << " * Schedule table (BottomUp):\n";

1085

1090 if (!SU)

1091 continue;

1095 PI != PE; ++PI) {

1096 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)

1097 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;

1098 }

1099 }

1100

1102 for (int C = FirstCycle; C >= LastCycle; --C)

1104 dbgs() << "|\n";

1105

1108 if (!SU) {

1109 dbgs() << "Missing SUnit\n";

1110 continue;

1111 }

1112 std::string NodeName("SU(");

1113 NodeName += std::to_string(SU->NodeNum) + ")";

1115 int C = FirstCycle;

1116 for (; C >= LastCycle; --C) {

1119 else

1121 }

1122 dbgs() << "|\n";

1127

1132 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||

1133 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&

1134 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);

1135 });

1137 C = FirstCycle;

1138 const std::string ResName =

1141 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {

1143 }

1144 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;

1145 ++I, --C)

1147 while (C-- >= LastCycle)

1149

1150 dbgs() << "| \n";

1151 }

1152 }

1153}

1154#endif

1155

1156#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1164 dbgs() << "* Schedule table (Bidirectional): not implemented\n";

1165 } else {

1166 dbgs() << "* Schedule table: DumpDirection not set.\n";

1167 }

1168 }

1169

1173 else

1174 dbgs() << "Missing SUnit\n";

1175 }

1176}

1177#endif

1178

1179

1180

1181

1182

1183

1186}

1187

1191 if (!MO.isReg())

1192 continue;

1193 if (!MO.readsReg())

1194 continue;

1196 continue;

1197

1199 if (!Reg.isVirtual())

1200 continue;

1201

1202

1204 bool FoundDef = false;

1206 if (MO2.getReg() == Reg && !MO2.isDead()) {

1207 FoundDef = true;

1208 break;

1209 }

1210 }

1211 if (FoundDef)

1212 continue;

1213 }

1214

1215

1218 if (UI->SU == &SU)

1219 break;

1220 }

1223 }

1224}

1225

1226

1227

1228

1229

1233 unsigned regioninstrs)

1234{

1235

1237

1238

1240

1242

1245

1247 "ShouldTrackLaneMasks requires ShouldTrackPressure");

1248}

1249

1250

1251

1257

1262

1263

1265

1267

1268

1271

1272

1273

1274

1277

1283 };

1284

1285

1286

1288

1289

1294 }

1295

1298 dbgs() << "Bottom Pressure:\n";

1300

1304 "Can't find the region bottom");

1305

1306

1307

1311 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {

1317 }

1318 }

1323 dbgs() << "\n");

1324}

1325

1328 const std::vector &NewMaxPressure) {

1332 if (!PC.isValid())

1333 break;

1334 unsigned ID = PC.getPSet();

1336 ++CritIdx;

1339 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())

1341 }

1343 if (NewMaxPressure[ID] >= Limit - 2) {

1345 << NewMaxPressure[ID]

1346 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")

1348 << " livethru)\n");

1349 }

1350 }

1351}

1352

1353

1354

1358

1359 if (!Reg.isVirtual())

1360 continue;

1361

1363

1364

1365

1366

1367 bool Decrement = P.LaneMask.any();

1368

1371 SUnit &SU = *V2SU.SU;

1373 continue;

1374

1381 }

1382 } else {

1383 assert(P.LaneMask.any());

1385

1386

1387

1388

1395 else {

1398 }

1399

1400 assert(VNI && "No live value at use.");

1403 SUnit *SU = V2SU.SU;

1404

1405

1409 if (LRQ.valueIn() == VNI) {

1415 }

1416 }

1417 }

1418 }

1419 }

1420}

1421

1423#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1429 dbgs() << " Pressure Diff : ";

1431 }

1432 dbgs() << " Single Issue : ";

1435 dbgs() << "true;";

1436 else

1437 dbgs() << "false;";

1438 dbgs() << '\n';

1439 }

1442#endif

1443}

1444

1445

1446

1447

1448

1449

1450

1451

1452

1453

1454

1456 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");

1459

1461

1464

1465

1466

1468

1472

1473

1475

1476 bool IsTopNode = false;

1477 while (true) {

1478 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");

1480 if (!SU) break;

1481

1484 break;

1485

1487

1493 SchedImpl->scheduleTree(SubtreeID);

1494 }

1495 }

1496

1497

1498 SchedImpl->schedNode(SU, IsTopNode);

1499

1501 }

1503

1505

1507 dbgs() << "*** Final schedule for "

1510 dbgs() << '\n';

1511 });

1512}

1513

1514

1520 return;

1521 }

1522

1523

1526

1527

1530

1531

1533

1534

1536}

1537

1546}

1547

1548

1549

1550

1551

1552

1553

1554

1555

1556

1557

1558

1559

1560

1561

1562

1563

1564

1565

1566

1567

1568

1569

1570

1571

1572

1573

1575

1577 return 0;

1578

1579 unsigned MaxCyclicLatency = 0;

1580

1583 if (!Reg.isVirtual())

1584 continue;

1587 if (!DefVNI)

1588 continue;

1589

1592 if (!DefSU)

1593 continue;

1594

1595 unsigned LiveOutHeight = DefSU->getHeight();

1596 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;

1597

1600 SUnit *SU = V2SU.SU;

1602 continue;

1603

1604

1607 continue;

1608

1609

1610

1611

1612 unsigned CyclicLatency = 0;

1613 if (LiveOutDepth > SU->getDepth())

1614 CyclicLatency = LiveOutDepth - SU->getDepth();

1615

1617 if (LiveInHeight > LiveOutHeight) {

1618 if (LiveInHeight - LiveOutHeight < CyclicLatency)

1619 CyclicLatency = LiveInHeight - LiveOutHeight;

1620 } else

1621 CyclicLatency = 0;

1622

1624 << SU->NodeNum << ") = " << CyclicLatency << "c\n");

1625 if (CyclicLatency > MaxCyclicLatency)

1626 MaxCyclicLatency = CyclicLatency;

1627 }

1628 }

1629 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");

1630 return MaxCyclicLatency;

1631}

1632

1633

1634

1641 }

1642}

1643

1644

1646

1648

1649 if (IsTopNode) {

1650 assert(SU->isTopReady() && "node still has unscheduled dependencies");

1653 else {

1656 }

1657

1659

1662 false);

1664

1667 } else {

1668

1670 }

1671

1676

1678 }

1679 } else {

1680 assert(SU->isBottomReady() && "node still has unscheduled dependencies");

1683 if (&*priorII == MI)

1685 else {

1689 }

1693 }

1697 false);

1699

1702 } else {

1703

1705 }

1706

1714

1717 }

1718 }

1719}

1720

1721

1722

1723

1724

1725namespace {

1726

1727

1728

1730 struct MemOpInfo {

1735 bool OffsetIsScalable;

1736

1739 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),

1740 OffsetIsScalable(OffsetIsScalable) {}

1741

1744 if (A->getType() != B->getType())

1745 return A->getType() < B->getType();

1746 if (A->isReg())

1747 return A->getReg() < B->getReg();

1748 if (A->isFI()) {

1749 const MachineFunction &MF = *A->getParent()->getParent()->getParent();

1753 return StackGrowsDown ? A->getIndex() > B->getIndex()

1754 : A->getIndex() < B->getIndex();

1755 }

1756

1757 llvm_unreachable("MemOpClusterMutation only supports register or frame "

1758 "index bases.");

1759 }

1760

1761 bool operator<(const MemOpInfo &RHS) const {

1762

1763

1764 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),

1765 RHS.BaseOps.begin(), RHS.BaseOps.end(),

1766 Compare))

1767 return true;

1768 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),

1769 BaseOps.begin(), BaseOps.end(), Compare))

1770 return false;

1773 return SU->NodeNum < RHS.SU->NodeNum;

1774 }

1775 };

1776

1779 bool IsLoad;

1780 bool ReorderWhileClustering;

1781

1782public:

1785 bool ReorderWhileClustering)

1786 : TII(tii), TRI(tri), IsLoad(IsLoad),

1787 ReorderWhileClustering(ReorderWhileClustering) {}

1788

1790

1791protected:

1792 void clusterNeighboringMemOps(ArrayRef MemOps, bool FastCluster,

1794 void collectMemOpRecords(std::vector &SUnits,

1798};

1799

1800class StoreClusterMutation : public BaseMemOpClusterMutation {

1801public:

1804 bool ReorderWhileClustering)

1805 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}

1806};

1807

1808class LoadClusterMutation : public BaseMemOpClusterMutation {

1809public:

1811 bool ReorderWhileClustering)

1812 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}

1813};

1814

1815}

1816

1817namespace llvm {

1818

1819std::unique_ptr

1822 bool ReorderWhileClustering) {

1824 TII, TRI, ReorderWhileClustering)

1825 : nullptr;

1826}

1827

1828std::unique_ptr

1831 bool ReorderWhileClustering) {

1833 TII, TRI, ReorderWhileClustering)

1834 : nullptr;

1835}

1836

1837}

1838

1839

1840

1841

1842

1843

1844void BaseMemOpClusterMutation::clusterNeighboringMemOps(

1847

1849

1850

1851

1852 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {

1853

1854 auto MemOpa = MemOpRecords[Idx];

1855

1856

1857 unsigned NextIdx = Idx + 1;

1858 for (; NextIdx < End; ++NextIdx)

1859

1860

1861 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&

1862 (FastCluster ||

1863 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&

1864 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))

1865 break;

1866 if (NextIdx == End)

1867 continue;

1868

1869 auto MemOpb = MemOpRecords[NextIdx];

1870 unsigned ClusterLength = 2;

1871 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +

1872 MemOpb.Width.getValue().getKnownMinValue();

1873 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {

1874 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;

1875 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +

1876 MemOpb.Width.getValue().getKnownMinValue();

1877 }

1878

1879 if (TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,

1880 MemOpa.OffsetIsScalable, MemOpb.BaseOps,

1881 MemOpb.Offset, MemOpb.OffsetIsScalable,

1882 ClusterLength, CurrentClusterBytes))

1883 continue;

1884

1885 SUnit *SUa = MemOpa.SU;

1886 SUnit *SUb = MemOpb.SU;

1887 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)

1889

1890

1892 continue;

1893

1895 << SUb->NodeNum << ")\n");

1896 ++NumClustered;

1897

1898 if (IsLoad) {

1899

1900

1901

1902

1903 for (const SDep &Succ : SUa->Succs) {

1905 continue;

1907 << ")\n");

1909 }

1910 } else {

1911

1912

1913

1914

1915

1916

1917 for (const SDep &Pred : SUb->Preds) {

1919 continue;

1921 << ")\n");

1923 }

1924 }

1925

1926 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,

1927 CurrentClusterBytes};

1928

1929 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength

1930 << ", Curr cluster bytes: " << CurrentClusterBytes

1931 << "\n");

1932 }

1933}

1934

1935void BaseMemOpClusterMutation::collectMemOpRecords(

1937 for (auto &SU : SUnits) {

1940 continue;

1941

1945 bool OffsetIsScalable;

1948 OffsetIsScalable, Width, TRI)) {

1950 continue;

1951

1953 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));

1954

1955 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "

1956 << Offset << ", OffsetIsScalable: " << OffsetIsScalable

1957 << ", Width: " << Width << "\n");

1958 }

1959#ifndef NDEBUG

1960 for (const auto *Op : BaseOps)

1962#endif

1963 }

1964}

1965

1966bool BaseMemOpClusterMutation::groupMemOps(

1969 bool FastCluster =

1972

1973 for (const auto &MemOp : MemOps) {

1974 unsigned ChainPredID = DAG->SUnits.size();

1975 if (FastCluster) {

1976 for (const SDep &Pred : MemOp.SU->Preds) {

1977

1978

1979

1980 if ((Pred.isCtrl() &&

1981 (IsLoad ||

1985 break;

1986 }

1987 }

1988 } else

1989 ChainPredID = 0;

1990

1992 }

1993 return FastCluster;

1994}

1995

1996

1998

2000 collectMemOpRecords(DAG->SUnits, MemOpRecords);

2001

2002 if (MemOpRecords.size() < 2)

2003 return;

2004

2005

2006

2007

2009 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);

2010

2011 for (auto &Group : Groups) {

2012

2013

2015

2016

2017 clusterNeighboringMemOps(Group.second, FastCluster, DAG);

2018 }

2019}

2020

2021

2022

2023

2024

2025namespace {

2026

2027

2028

2029

2031

2033

2034

2035

2037

2038public:

2040

2042

2043protected:

2045};

2046

2047}

2048

2049namespace llvm {

2050

2051std::unique_ptr

2054 return std::make_unique(TII, TRI);

2055}

2056

2057}

2058

2059

2060

2061

2062

2063

2064

2065

2066

2067

2068

2069

2070

2071

2072

2073

2074

2075

2076

2077

2081

2082

2086 return;

2087

2091 return;

2092

2093

2094

2095

2096

2097

2098

2099 unsigned LocalReg = SrcReg;

2100 unsigned GlobalReg = DstReg;

2102 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {

2103 LocalReg = DstReg;

2104 GlobalReg = SrcReg;

2106 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))

2107 return;

2108 }

2110

2111

2113

2114

2115

2116

2117 if (GlobalSegment == GlobalLI->end())

2118 return;

2119

2120

2121

2122

2123

2124 if (GlobalSegment->contains(LocalLI->beginIndex()))

2125 ++GlobalSegment;

2126

2127 if (GlobalSegment == GlobalLI->end())

2128 return;

2129

2130

2131 if (GlobalSegment != GlobalLI->begin()) {

2132

2134 GlobalSegment->start)) {

2135 return;

2136 }

2137

2138

2141 return;

2142 }

2143

2144

2145 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&

2146 "Disconnected LRG within the scheduling region.");

2147 }

2149 if (!GlobalDef)

2150 return;

2151

2153 if (!GlobalSU)

2154 return;

2155

2156

2157

2161 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);

2162 for (const SDep &Succ : LastLocalSU->Succs) {

2164 continue;

2165 if (Succ.getSUnit() == GlobalSU)

2166 continue;

2168 return;

2170 }

2171

2172

2176 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);

2177 for (const SDep &Pred : GlobalSU->Preds) {

2179 continue;

2180 if (Pred.getSUnit() == FirstLocalSU)

2181 continue;

2183 return;

2185 }

2187

2188 for (SUnit *LU : LocalUses) {

2189 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("

2190 << GlobalSU->NodeNum << ")\n");

2192 }

2193 for (SUnit *GU : GlobalUses) {

2194 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("

2195 << FirstLocalSU->NodeNum << ")\n");

2197 }

2198}

2199

2200

2201

2205

2207 if (FirstPos == DAG->end())

2208 return;

2212

2215 continue;

2216

2218 }

2219}

2220

2221

2222

2223

2224

2225

2227

2229

2230

2231

2232

2233

2235 unsigned Latency, bool AfterSchedNode) {

2236 int ResCntFactor = (int)(Count - (Latency * LFactor));

2237 if (AfterSchedNode)

2238 return ResCntFactor >= (int)LFactor;

2239 else

2240 return ResCntFactor > (int)LFactor;

2241}

2242

2244

2245

2246

2250 }

2253 CheckPending = false;

2254 CurrCycle = 0;

2255 CurrMOps = 0;

2256 MinReadyCycle = std::numeric_limits::max();

2257 ExpectedLatency = 0;

2258 DependentLatency = 0;

2259 RetiredMOps = 0;

2260 MaxExecutedResCount = 0;

2261 ZoneCritResIdx = 0;

2262 IsResourceLimited = false;

2263 ReservedCycles.clear();

2264 ReservedResourceSegments.clear();

2265 ReservedCyclesIndex.clear();

2266 ResourceGroupSubUnitMasks.clear();

2267#if LLVM_ENABLE_ABI_BREAKING_CHECKS

2268

2269

2270

2271 MaxObservedStall = 0;

2272#endif

2273

2274 ExecutedResCounts.resize(1);

2275 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");

2276}

2277

2282 return;

2291 unsigned PIdx = PI->ProcResourceIdx;

2293 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);

2295 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));

2296 }

2297 }

2298}

2299

2303 DAG = dag;

2308 ReservedCyclesIndex.resize(ResourceCount);

2309 ExecutedResCounts.resize(ResourceCount);

2310 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));

2311 unsigned NumUnits = 0;

2312

2313 for (unsigned i = 0; i < ResourceCount; ++i) {

2314 ReservedCyclesIndex[i] = NumUnits;

2319 U != UE; ++U)

2320 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);

2321 }

2322 }

2323

2324 ReservedCycles.resize(NumUnits, InvalidCycle);

2325 }

2326}

2327

2328

2329

2330

2331

2332

2333

2334

2337 return 0;

2338

2340 if (ReadyCycle > CurrCycle)

2341 return ReadyCycle - CurrCycle;

2342 return 0;

2343}

2344

2345

2346

2348 unsigned ReleaseAtCycle,

2349 unsigned AcquireAtCycle) {

2352 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(

2353 CurrCycle, AcquireAtCycle, ReleaseAtCycle);

2354

2355 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(

2356 CurrCycle, AcquireAtCycle, ReleaseAtCycle);

2357 }

2358

2359 unsigned NextUnreserved = ReservedCycles[InstanceIdx];

2360

2362 return CurrCycle;

2363

2365 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);

2366 return NextUnreserved;

2367}

2368

2369

2370

2371

2372std::pair<unsigned, unsigned>

2374 unsigned ReleaseAtCycle,

2375 unsigned AcquireAtCycle) {

2377 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");

2379 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");

2380 }

2382 unsigned InstanceIdx = 0;

2383 unsigned StartIndex = ReservedCyclesIndex[PIdx];

2385 assert(NumberOfInstances > 0 &&

2386 "Cannot have zero instances of a ProcResource");

2387

2389

2390

2391

2392

2393

2394

2395

2396

2397

2398

2399

2403 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])

2405 StartIndex, ReleaseAtCycle, AcquireAtCycle),

2406 StartIndex);

2407

2409 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {

2410 unsigned NextUnreserved, NextInstanceIdx;

2411 std::tie(NextUnreserved, NextInstanceIdx) =

2413 if (MinNextUnreserved > NextUnreserved) {

2414 InstanceIdx = NextInstanceIdx;

2415 MinNextUnreserved = NextUnreserved;

2416 }

2417 }

2418 return std::make_pair(MinNextUnreserved, InstanceIdx);

2419 }

2420

2421 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;

2422 ++I) {

2423 unsigned NextUnreserved =

2426 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"

2427 << NextUnreserved << "c\n");

2428 if (MinNextUnreserved > NextUnreserved) {

2429 InstanceIdx = I;

2430 MinNextUnreserved = NextUnreserved;

2431 }

2432 }

2435 << "[" << InstanceIdx - StartIndex << "]"

2436 << " available @" << MinNextUnreserved << "c"

2437 << "\n");

2438 return std::make_pair(MinNextUnreserved, InstanceIdx);

2439}

2440

2441

2442

2443

2444

2445

2446

2447

2448

2449

2450

2451

2452

2453

2457 return true;

2458 }

2459

2464 return true;

2465 }

2466

2467 if (CurrMOps > 0 &&

2471 << (isTop() ? "begin" : "end") << " group\n");

2472 return true;

2473 }

2474

2480 unsigned ResIdx = PE.ProcResourceIdx;

2481 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;

2482 unsigned AcquireAtCycle = PE.AcquireAtCycle;

2483 unsigned NRCycle, InstanceIdx;

2484 std::tie(NRCycle, InstanceIdx) =

2486 if (NRCycle > CurrCycle) {

2487#if LLVM_ENABLE_ABI_BREAKING_CHECKS

2488 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);

2489#endif

2492 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'

2493 << "=" << NRCycle << "c\n");

2494 return true;

2495 }

2496 }

2497 }

2498 return false;

2499}

2500

2501

2504 SUnit *LateSU = nullptr;

2505 unsigned RemLatency = 0;

2506 for (SUnit *SU : ReadySUs) {

2508 if (L > RemLatency) {

2509 RemLatency = L;

2510 LateSU = SU;

2511 }

2512 }

2513 if (LateSU) {

2515 << LateSU->NodeNum << ") " << RemLatency << "c\n");

2516 }

2517 return RemLatency;

2518}

2519

2520

2521

2522

2525 OtherCritIdx = 0;

2527 return 0;

2528

2534 PIdx != PEnd; ++PIdx) {

2536 if (OtherCount > OtherCritCount) {

2537 OtherCritCount = OtherCount;

2538 OtherCritIdx = PIdx;

2539 }

2540 }

2541 if (OtherCritIdx) {

2546 }

2547 return OtherCritCount;

2548}

2549

2551 unsigned Idx) {

2552 assert(SU->getInstr() && "Scheduled SUnit must have instr");

2553

2554#if LLVM_ENABLE_ABI_BREAKING_CHECKS

2555

2556

2557

2558 if (ReadyCycle > CurrCycle)

2559 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);

2560#endif

2561

2562 if (ReadyCycle < MinReadyCycle)

2563 MinReadyCycle = ReadyCycle;

2564

2565

2566

2568 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||

2570

2571 if (!HazardDetected) {

2573

2574 if (InPQueue)

2576 return;

2577 }

2578

2579 if (!InPQueue)

2581}

2582

2583

2586 assert(MinReadyCycle < std::numeric_limits::max() &&

2587 "MinReadyCycle uninitialized");

2588 if (MinReadyCycle > NextCycle)

2589 NextCycle = MinReadyCycle;

2590 }

2591

2593 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;

2594

2595

2596 if ((NextCycle - CurrCycle) > DependentLatency)

2597 DependentLatency = 0;

2598 else

2599 DependentLatency -= (NextCycle - CurrCycle);

2600

2602

2603 CurrCycle = NextCycle;

2604 } else {

2605

2606 for (; CurrCycle != NextCycle; ++CurrCycle) {

2609 else

2611 }

2612 }

2613 CheckPending = true;

2614 IsResourceLimited =

2617

2619 << '\n');

2620}

2621

2623 ExecutedResCounts[PIdx] += Count;

2624 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)

2625 MaxExecutedResCount = ExecutedResCounts[PIdx];

2626}

2627

2628

2629

2630

2631

2632

2633

2634

2635

2636

2637

2639 unsigned ReleaseAtCycle,

2640 unsigned NextCycle,

2641 unsigned AcquireAtCycle) {

2643 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);

2645 << ReleaseAtCycle << "x" << Factor << "u\n");

2646

2647

2651

2652

2653

2655 ZoneCritResIdx = PIdx;

2659 << "c\n");

2660 }

2661

2662 unsigned NextAvailable, InstanceIdx;

2663 std::tie(NextAvailable, InstanceIdx) =

2665 if (NextAvailable > CurrCycle) {

2668 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'

2669 << " reserved until @" << NextAvailable << "\n");

2670 }

2671 return NextAvailable;

2672}

2673

2674

2676

2679

2680

2682 }

2684

2685 CheckPending = true;

2686 }

2687

2688

2693 "Cannot schedule this instruction's MicroOps in the current cycle.");

2694

2696 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");

2697

2698 unsigned NextCycle = CurrCycle;

2700 case 0:

2701 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");

2702 break;

2703 case 1:

2704 if (ReadyCycle > NextCycle) {

2705 NextCycle = ReadyCycle;

2706 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");

2707 }

2708 break;

2709 default:

2710

2711

2712

2713

2714 if (SU->isUnbuffered && ReadyCycle > NextCycle)

2715 NextCycle = ReadyCycle;

2716 break;

2717 }

2718 RetiredMOps += IncMOps;

2719

2720

2725 if (ZoneCritResIdx) {

2726

2727 unsigned ScaledMOps =

2729

2730

2731

2734 ZoneCritResIdx = 0;

2735 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "

2737 << "c\n");

2738 }

2739 }

2743 unsigned RCycle =

2744 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,

2745 PI->AcquireAtCycle);

2746 if (RCycle > NextCycle)

2747 NextCycle = RCycle;

2748 }

2750

2751

2752

2753

2757 unsigned PIdx = PI->ProcResourceIdx;

2759

2761 unsigned ReservedUntil, InstanceIdx;

2763 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);

2765 ReservedResourceSegments[InstanceIdx].add(

2767 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),

2769 } else {

2770 ReservedResourceSegments[InstanceIdx].add(

2772 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),

2774 }

2775 } else {

2776

2777 unsigned ReservedUntil, InstanceIdx;

2779 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);

2781 ReservedCycles[InstanceIdx] =

2782 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);

2783 } else

2784 ReservedCycles[InstanceIdx] = NextCycle;

2785 }

2786 }

2787 }

2788 }

2789 }

2790

2791 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;

2792 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;

2793 if (SU->getDepth() > TopLatency) {

2794 TopLatency = SU->getDepth();

2796 << SU->NodeNum << ") " << TopLatency << "c\n");

2797 }

2798 if (SU->getHeight() > BotLatency) {

2801 << SU->NodeNum << ") " << BotLatency << "c\n");

2802 }

2803

2804 if (NextCycle > CurrCycle)

2806 else

2807

2808

2809 IsResourceLimited =

2812

2813

2814

2815

2816

2817 CurrMOps += IncMOps;

2818

2819

2820

2821

2822

2826 << " group\n");

2828 }

2829

2831 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "

2832 << CurrCycle << '\n');

2834 }

2836}

2837

2838

2839

2841

2843 MinReadyCycle = std::numeric_limits::max();

2844

2845

2846

2847 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {

2850

2851 if (ReadyCycle < MinReadyCycle)

2852 MinReadyCycle = ReadyCycle;

2853

2855 break;

2856

2859 --I;

2860 --E;

2861 }

2862 }

2863 CheckPending = false;

2864}

2865

2866

2870 else {

2873 }

2874}

2875

2876

2877

2878

2880 if (CheckPending)

2882

2883

2888 continue;

2889 }

2890 ++I;

2891 }

2893

2894

2895

2896 (void)i;

2899 }

2900

2903

2906 return nullptr;

2907}

2908

2909#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

2910

2911

2912

2913

2916 return;

2917

2919 unsigned StartIdx = 0;

2920

2921 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {

2924 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {

2925 dbgs() << ResName << "(" << UnitIdx << ") = ";

2927 if (ReservedResourceSegments.count(StartIdx + UnitIdx))

2928 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);

2929 else

2930 dbgs() << "{ }\n";

2931 } else

2932 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";

2933 }

2934 StartIdx += NumUnits;

2935 }

2936}

2937

2938

2939

2941 unsigned ResFactor;

2942 unsigned ResCount;

2943 if (ZoneCritResIdx) {

2946 } else {

2948 ResCount = RetiredMOps * ResFactor;

2949 }

2952 << " Retired: " << RetiredMOps;

2954 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "

2955 << ResCount / ResFactor << " "

2957 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"

2958 << (IsResourceLimited ? " - Resource" : " - Latency")

2959 << " limited.\n";

2962}

2963#endif

2964

2965

2966

2967

2968

2973 return;

2974

2983 }

2984}

2985

2986

2987

2988

2989

2990

2991

2992

2993

2994

2995

2996

2997

2998

2999

3000

3001

3004 RemLatency = std::max(RemLatency,

3006 RemLatency = std::max(RemLatency,

3008 return RemLatency;

3009}

3010

3011

3012

3013bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,

3015 bool ComputeRemLatency,

3016 unsigned &RemLatency) const {

3017

3018

3020 return true;

3021

3022

3024 return false;

3025

3026 if (ComputeRemLatency)

3028

3030}

3031

3032

3033

3037

3038

3039

3040

3041

3042 unsigned OtherCritIdx = 0;

3043 unsigned OtherCount =

3045

3046 bool OtherResLimited = false;

3047 unsigned RemLatency = 0;

3048 bool RemLatencyComputed = false;

3051 RemLatencyComputed = true;

3053 OtherCount, RemLatency, false);

3054 }

3055

3056

3057

3058

3059 if (!OtherResLimited &&

3060 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,

3061 RemLatency))) {

3064 << " RemainingLatency " << RemLatency << " + "

3065 << CurrZone.getCurrCycle() << "c > CritPath "

3067 }

3068

3070 return;

3071

3073 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "

3074 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";

3075 } if (OtherResLimited) dbgs()

3076 << " RemainingLimit: "

3079 << " Latency limited both directions.\n");

3080

3083

3084 if (OtherResLimited)

3086}

3087

3088#ifndef NDEBUG

3091 switch (Reason) {

3092 case NoCand: return "NOCAND ";

3093 case Only1: return "ONLY1 ";

3094 case PhysReg: return "PHYS-REG ";

3095 case RegExcess: return "REG-EXCESS";

3097 case Stall: return "STALL ";

3098 case Cluster: return "CLUSTER ";

3099 case Weak: return "WEAK ";

3100 case RegMax: return "REG-MAX ";

3108 case NodeOrder: return "ORDER ";

3109 };

3111}

3112

3115 unsigned ResIdx = 0;

3117 switch (Cand.Reason) {

3118 default:

3119 break;

3122 break;

3125 break;

3128 break;

3131 break;

3134 break;

3137 break;

3140 break;

3143 break;

3146 break;

3147 }

3149 if (P.isValid())

3151 << ":" << P.getUnitInc() << " ";

3152 else

3153 dbgs() << " ";

3154 if (ResIdx)

3156 else

3157 dbgs() << " ";

3160 else

3161 dbgs() << " ";

3162 dbgs() << '\n';

3163}

3164#endif

3165

3166namespace llvm {

3167

3168

3169

3174 if (TryVal < CandVal) {

3175 TryCand.Reason = Reason;

3176 return true;

3177 }

3178 if (TryVal > CandVal) {

3179 if (Cand.Reason > Reason)

3180 Cand.Reason = Reason;

3181 return true;

3182 }

3183 return false;

3184}

3185

3190 if (TryVal > CandVal) {

3191 TryCand.Reason = Reason;

3192 return true;

3193 }

3194 if (TryVal < CandVal) {

3195 if (Cand.Reason > Reason)

3196 Cand.Reason = Reason;

3197 return true;

3198 }

3199 return false;

3200}

3201

3205 if (Zone.isTop()) {

3206

3207

3208

3213 return true;

3214 }

3217 return true;

3218 } else {

3219

3220

3221

3226 return true;

3227 }

3230 return true;

3231 }

3232 return false;

3233}

3234}

3235

3237 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")

3239}

3240

3243}

3244

3247 "(PreRA)GenericScheduler needs vreg liveness");

3251

3253 DAG->computeDFSResult();

3254

3258

3259

3260

3261

3262

3264 if (!Top.HazardRec) {

3266 }

3267 if (!Bot.HazardRec) {

3269 }

3270 TopCand.SU = nullptr;

3271 BotCand.SU = nullptr;

3272}

3273

3274

3277 unsigned NumRegionInstrs) {

3280

3281

3282

3283

3284

3286 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {

3292 break;

3293 }

3294 }

3295

3296

3297

3299

3300

3302

3303

3307 }

3308

3318 }

3319}

3320

3322

3323#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

3324 dbgs() << "GenericScheduler RegionPolicy: "

3328 << "\n";

3329#endif

3330}

3331

3332

3333

3334

3335

3336

3337

3338

3339

3340

3343 return;

3344

3345

3346 unsigned IterCount =

3349

3351

3352 unsigned InFlightCount =

3353 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;

3354 unsigned BufferLimit =

3356

3358

3360 dbgs() << "IssueCycles="

3363 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount

3367}

3368

3371

3372

3373 for (const SUnit *SU : Bot.Available) {

3376 }

3380 }

3381

3384 checkAcyclicLatency();

3385 }

3386}

3387

3388namespace llvm {

3396

3397

3399 Reason)) {

3400 return true;

3401 }

3402

3403

3405 return false;

3406

3407

3408

3411 if (TryPSet == CandPSet) {

3413 Reason);

3414 }

3415

3416 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :

3417 std::numeric_limits::max();

3418

3419 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :

3420 std::numeric_limits::max();

3421

3422

3425 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);

3426}

3427

3430}

3431

3432

3433

3434

3435

3436

3437

3438

3441

3442 if (MI->isCopy()) {

3443 unsigned ScheduledOper = isTop ? 1 : 0;

3444 unsigned UnscheduledOper = isTop ? 0 : 1;

3445

3446

3447 if (MI->getOperand(ScheduledOper).getReg().isPhysical())

3448 return 1;

3449

3450

3452 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())

3453 return AtBoundary ? -1 : 1;

3454 }

3455

3456 if (MI->isMoveImmediate()) {

3457

3458

3459

3460 bool DoBias = true;

3462 if (Op.isReg() && Op.getReg().isPhysical()) {

3463 DoBias = false;

3464 break;

3465 }

3466 }

3467

3468 if (DoBias)

3469 return isTop ? -1 : 1;

3470 }

3471

3472 return 0;

3473}

3474}

3475

3477 bool AtTop,

3480 Cand.SU = SU;

3481 Cand.AtTop = AtTop;

3482 if (DAG->isTrackingPressure()) {

3483 if (AtTop) {

3487 DAG->getRegionCriticalPSets(),

3488 DAG->getRegPressure().MaxSetPressure);

3489 } else {

3493 &DAG->getPressureDiff(Cand.SU),

3495 DAG->getRegionCriticalPSets(),

3496 DAG->getRegPressure().MaxSetPressure);

3497 } else {

3500 DAG->getPressureDiff(Cand.SU),

3502 DAG->getRegionCriticalPSets(),

3503 DAG->getRegPressure().MaxSetPressure);

3504 }

3505 }

3506 }

3508 << " Try SU(" << Cand.SU->NodeNum << ") "

3511}

3512

3513

3514

3515

3516

3517

3518

3519

3520

3521

3522

3523

3527

3530 return true;

3531 }

3532

3533

3537

3538

3542 DAG->MF))

3544

3545

3549 DAG->MF))

3551

3552

3553

3554

3555

3556

3557 bool SameBoundary = Zone != nullptr;

3558 if (SameBoundary) {

3559

3560

3561

3565

3566

3570 }

3571

3572

3573

3574

3575

3576

3577

3578 const SUnit *CandNextClusterSU =

3580 const SUnit *TryCandNextClusterSU =

3582 if (tryGreater(TryCand.SU == TryCandNextClusterSU,

3583 Cand.SU == CandNextClusterSU,

3584 TryCand, Cand, Cluster))

3586

3587 if (SameBoundary) {

3588

3591 TryCand, Cand, Weak))

3593 }

3594

3595

3599 DAG->MF))

3601

3602 if (SameBoundary) {

3603

3612

3613

3614

3618

3619

3623 return true;

3624 }

3625 }

3626

3627 return false;

3628}

3629

3630

3631

3632

3633

3634

3639

3641

3643 for (SUnit *SU : Q) {

3644

3646 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);

3647

3649 if (tryCandidate(Cand, TryCand, ZoneArg)) {

3650

3655 }

3656 }

3657}

3658

3659

3661

3662

3663 if (SUnit *SU = Bot.pickOnlyChoice()) {

3664 IsTopNode = false;

3666 return SU;

3667 }

3668 if (SUnit *SU = Top.pickOnlyChoice()) {

3669 IsTopNode = true;

3671 return SU;

3672 }

3673

3674

3676 setPolicy(BotPolicy, false, Bot, &Top);

3677

3678

3680 setPolicy(TopPolicy, false, Top, &Bot);

3681

3682

3684 if (!BotCand.isValid() || BotCand.SU->isScheduled ||

3685 BotCand.Policy != BotPolicy) {

3687 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);

3688 assert(BotCand.Reason != NoCand && "failed to find the first candidate");

3689 } else {

3691#ifndef NDEBUG

3695 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);

3696 assert(TCand.SU == BotCand.SU &&

3697 "Last pick result should correspond to re-picking right now");

3698 }

3699#endif

3700 }

3701

3702

3704 if (!TopCand.isValid() || TopCand.SU->isScheduled ||

3705 TopCand.Policy != TopPolicy) {

3707 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);

3708 assert(TopCand.Reason != NoCand && "failed to find the first candidate");

3709 } else {

3711#ifndef NDEBUG

3715 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);

3716 assert(TCand.SU == TopCand.SU &&

3717 "Last pick result should correspond to re-picking right now");

3718 }

3719#endif

3720 }

3721

3722

3723 assert(BotCand.isValid());

3724 assert(TopCand.isValid());

3727 if (tryCandidate(Cand, TopCand, nullptr)) {

3730 }

3731

3732 IsTopNode = Cand.AtTop;

3734 return Cand.SU;

3735}

3736

3737

3739 if (DAG->top() == DAG->bottom()) {

3740 assert(Top.Available.empty() && Top.Pending.empty() &&

3741 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

3742 return nullptr;

3743 }

3745 do {

3747 SU = Top.pickOnlyChoice();

3748 if (!SU) {

3750 TopCand.reset(NoPolicy);

3751 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);

3752 assert(TopCand.Reason != NoCand && "failed to find a candidate");

3754 SU = TopCand.SU;

3755 }

3756 IsTopNode = true;

3758 SU = Bot.pickOnlyChoice();

3759 if (!SU) {

3761 BotCand.reset(NoPolicy);

3762 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);

3763 assert(BotCand.Reason != NoCand && "failed to find a candidate");

3765 SU = BotCand.SU;

3766 }

3767 IsTopNode = false;

3768 } else {

3769 SU = pickNodeBidirectional(IsTopNode);

3770 }

3772

3773

3774

3775

3776

3777

3778

3779

3780

3781

3782

3783

3784

3785

3786

3787

3789 Top.removeReady(SU);

3791 Bot.removeReady(SU);

3792

3795 return SU;

3796}

3797

3800 if (!isTop)

3801 ++InsertPos;

3803

3804

3805

3806 for (SDep &Dep : Deps) {

3809 continue;

3810 SUnit *DepSU = Dep.getSUnit();

3811 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)

3812 continue;

3814 if (!Copy->isCopy() && !Copy->isMoveImmediate())

3815 continue;

3817 DAG->dumpNode(*Dep.getSUnit()));

3819 }

3820}

3821

3822

3823

3824

3825

3826

3827

3828

3830 if (IsTopNode) {

3832 Top.bumpNode(SU);

3834 reschedulePhysReg(SU, true);

3835 } else {

3837 Bot.bumpNode(SU);

3839 reschedulePhysReg(SU, false);

3840 }

3841}

3842

3843

3844

3848

3849

3850

3851

3852

3854

3856

3858 if (!MacroFusions.empty())

3860 return DAG;

3861}

3862

3865}

3866

3870

3871

3872

3873

3874

3876 DAG = Dag;

3879

3883

3884

3885

3887 if (!Top.HazardRec) {

3889 }

3890 if (!Bot.HazardRec) {

3892 }

3893}

3894

3897 unsigned NumRegionInstrs) {

3899

3900

3901

3904

3905

3907

3908

3918 }

3919}

3920

3923

3924

3925 for (const SUnit *SU : Bot.Available) {

3928 }

3932 }

3933}

3934

3935

3936

3937

3938

3939

3942

3945 return true;

3946 }

3947

3948

3949 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),

3950 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

3952

3953

3954 const SUnit *CandNextClusterSU =

3956 const SUnit *TryCandNextClusterSU =

3958 if (tryGreater(TryCand.SU == TryCandNextClusterSU,

3959 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))

3961

3962

3970

3971

3972

3974

3978 }

3979

3980

3983 return true;

3984 }

3985

3986 return false;

3987}

3988

3992 for (SUnit *SU : Q) {

3994 TryCand.SU = SU;

3997 if (tryCandidate(Cand, TryCand)) {

4000 }

4001 }

4002}

4003

4004

4006

4007

4008

4009

4010

4011 if (SUnit *SU = Bot.pickOnlyChoice()) {

4012 IsTopNode = false;

4014 return SU;

4015 }

4016 if (SUnit *SU = Top.pickOnlyChoice()) {

4017 IsTopNode = true;

4019 return SU;

4020 }

4021

4022

4024 setPolicy(BotPolicy, true, Bot, &Top);

4025

4026

4028 setPolicy(TopPolicy, true, Top, &Bot);

4029

4030

4032 if (!BotCand.isValid() || BotCand.SU->isScheduled ||

4033 BotCand.Policy != BotPolicy) {

4035 pickNodeFromQueue(Bot, BotCand);

4036 assert(BotCand.Reason != NoCand && "failed to find the first candidate");

4037 } else {

4039#ifndef NDEBUG

4043 pickNodeFromQueue(Bot, BotCand);

4044 assert(TCand.SU == BotCand.SU &&

4045 "Last pick result should correspond to re-picking right now");

4046 }

4047#endif

4048 }

4049

4050

4052 if (!TopCand.isValid() || TopCand.SU->isScheduled ||

4053 TopCand.Policy != TopPolicy) {

4055 pickNodeFromQueue(Top, TopCand);

4056 assert(TopCand.Reason != NoCand && "failed to find the first candidate");

4057 } else {

4059#ifndef NDEBUG

4063 pickNodeFromQueue(Top, TopCand);

4064 assert(TCand.SU == TopCand.SU &&

4065 "Last pick result should correspond to re-picking right now");

4066 }

4067#endif

4068 }

4069

4070

4071 assert(BotCand.isValid());

4072 assert(TopCand.isValid());

4075 if (tryCandidate(Cand, TopCand)) {

4078 }

4079

4080 IsTopNode = Cand.AtTop;

4082 return Cand.SU;

4083}

4084

4085

4087 if (DAG->top() == DAG->bottom()) {

4088 assert(Top.Available.empty() && Top.Pending.empty() &&

4089 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

4090 return nullptr;

4091 }

4093 do {

4095 SU = Bot.pickOnlyChoice();

4096 if (SU) {

4098 } else {

4100 BotCand.reset(NoPolicy);

4101

4102

4103 setPolicy(BotCand.Policy, true, Bot, nullptr);

4104 pickNodeFromQueue(Bot, BotCand);

4105 assert(BotCand.Reason != NoCand && "failed to find a candidate");

4107 SU = BotCand.SU;

4108 }

4109 IsTopNode = false;

4111 SU = Top.pickOnlyChoice();

4112 if (SU) {

4114 } else {

4116 TopCand.reset(NoPolicy);

4117

4118

4119 setPolicy(TopCand.Policy, true, Top, nullptr);

4120 pickNodeFromQueue(Top, TopCand);

4121 assert(TopCand.Reason != NoCand && "failed to find a candidate");

4123 SU = TopCand.SU;

4124 }

4125 IsTopNode = true;

4126 } else {

4127 SU = pickNodeBidirectional(IsTopNode);

4128 }

4130

4132 Top.removeReady(SU);

4134 Bot.removeReady(SU);

4135

4138 return SU;

4139}

4140

4141

4142

4144 if (IsTopNode) {

4146 Top.bumpNode(SU);

4147 } else {

4149 Bot.bumpNode(SU);

4150 }

4151}

4152

4155 new ScheduleDAGMI(C, std::make_unique(C),

4156 true);

4158

4160 if (!MacroFusions.empty())

4162 return DAG;

4163}

4164

4165

4166

4167

4168

4169namespace {

4170

4171

4172struct ILPOrder {

4174 const BitVector *ScheduledTrees = nullptr;

4175 bool MaximizeILP;

4176

4177 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}

4178

4179

4180

4181

4182 bool operator()(const SUnit *A, const SUnit *B) const {

4183 unsigned SchedTreeA = DFSResult->getSubtreeID(A);

4184 unsigned SchedTreeB = DFSResult->getSubtreeID(B);

4185 if (SchedTreeA != SchedTreeB) {

4186

4187 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))

4188 return ScheduledTrees->test(SchedTreeB);

4189

4190

4195 }

4196 }

4197 if (MaximizeILP)

4199 else

4201 }

4202};

4203

4204

4207 ILPOrder Cmp;

4208

4209 std::vector<SUnit*> ReadyQ;

4210

4211public:

4212 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}

4213

4221 }

4222

4223 void registerRoots() override {

4224

4225 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

4226 }

4227

4228

4229

4230

4231

4232 SUnit *pickNode(bool &IsTopNode) override {

4233 if (ReadyQ.empty()) return nullptr;

4234 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

4235 SUnit *SU = ReadyQ.back();

4236 ReadyQ.pop_back();

4237 IsTopNode = false;

4239 << "SU(" << SU->NodeNum << ") "

4242 << " @"

4245 << '\n'

4246 << "Scheduling " << *SU->getInstr());

4247 return SU;

4248 }

4249

4250

4251 void scheduleTree(unsigned SubtreeID) override {

4252 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

4253 }

4254

4255

4256

4257 void schedNode(SUnit *SU, bool IsTopNode) override {

4258 assert(!IsTopNode && "SchedDFSResult needs bottom-up");

4259 }

4260

4261 void releaseTopNode(SUnit *) override { }

4262

4263 void releaseBottomNode(SUnit *SU) override {

4264 ReadyQ.push_back(SU);

4265 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

4266 }

4267};

4268

4269}

4270

4272 return new ScheduleDAGMILive(C, std::make_unique(true));

4273}

4275 return new ScheduleDAGMILive(C, std::make_unique(false));

4276}

4277

4282

4283

4284

4285

4286

4287#ifndef NDEBUG

4288namespace {

4289

4290

4291

4292template

4293struct SUnitOrder {

4295 if (IsReverse)

4296 return A->NodeNum > B->NodeNum;

4297 else

4298 return A->NodeNum < B->NodeNum;

4299 }

4300};

4301

4302

4304 bool IsAlternating;

4305 bool IsTopDown;

4306

4307

4308

4309

4311 TopQ;

4312

4313

4315 BottomQ;

4316

4317public:

4318 InstructionShuffler(bool alternate, bool topdown)

4319 : IsAlternating(alternate), IsTopDown(topdown) {}

4320

4323 BottomQ.clear();

4324 }

4325

4326

4327

4328

4329 SUnit *pickNode(bool &IsTopNode) override {

4331 if (IsTopDown) {

4332 do {

4333 if (TopQ.empty()) return nullptr;

4334 SU = TopQ.top();

4335 TopQ.pop();

4337 IsTopNode = true;

4338 } else {

4339 do {

4340 if (BottomQ.empty()) return nullptr;

4341 SU = BottomQ.top();

4342 BottomQ.pop();

4344 IsTopNode = false;

4345 }

4346 if (IsAlternating)

4347 IsTopDown = !IsTopDown;

4348 return SU;

4349 }

4350

4351 void schedNode(SUnit *SU, bool IsTopNode) override {}

4352

4353 void releaseTopNode(SUnit *SU) override {

4354 TopQ.push(SU);

4355 }

4356 void releaseBottomNode(SUnit *SU) override {

4357 BottomQ.push(SU);

4358 }

4359};

4360

4361}

4362

4364 bool Alternate =

4368 C, std::make_unique(Alternate, TopDown));

4369}

4370

4372 "shuffle", "Shuffle machine instructions alternating directions",

4374#endif

4375

4376

4377

4378

4379

4380#ifndef NDEBUG

4381namespace llvm {

4382

4385

4386template<>

4389

4391 return std::string(G->MF.getName());

4392 }

4393

4395 return true;

4396 }

4397

4400 return false;

4403 }

4404

4405

4406

4411 return "color=cyan,style=dashed";

4413 return "color=blue,style=dashed";

4414 return "";

4415 }

4416

4418 std::string Str;

4423 SS << "SU:" << SU->NodeNum;

4424 if (DFS)

4426 return Str;

4427 }

4428

4430 return G->getGraphNodeLabel(SU);

4431 }

4432

4434 std::string Str("shape=Mrecord");

4438 if (DFS) {

4439 Str += ",style=filled,fillcolor=\"#";

4441 Str += '"';

4442 }

4443 return Str;

4444 }

4445};

4446

4447}

4448#endif

4449

4450

4451

4453#ifndef NDEBUG

4455#else

4456 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "

4457 << "systems with Graphviz or gv!\n";

4458#endif

4459}

4460

4461

4463 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());

4464}

4465

4466

4467

4468

4469

4472 return A.first < B.first;

4473}

4474

4475unsigned ResourceSegments::getFirstAvailableAt(

4476 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,

4478 IntervalBuilder) const {

4479 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),

4481 "Cannot execute on an un-sorted set of intervals.");

4482

4483

4484

4485 if (AcquireAtCycle == ReleaseAtCycle)

4486 return CurrCycle;

4487

4488 unsigned RetCycle = CurrCycle;

4490 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);

4491 for (auto &Interval : _Intervals) {

4493 continue;

4494

4495

4496

4498 "Invalid intervals configuration.");

4499 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;

4500 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);

4501 }

4502 return RetCycle;

4503}

4504

4506 const unsigned CutOff) {

4507 assert(A.first <= A.second && "Cannot add negative resource usage");

4508 assert(CutOff > 0 && "0-size interval history has no use.");

4509

4510

4511

4512

4513

4514 if (A.first == A.second)

4515 return;

4516

4520 }) &&

4521 "A resource is being overwritten");

4522 _Intervals.push_back(A);

4523

4524 sortAndMerge();

4525

4526

4527

4528 while (_Intervals.size() > CutOff)

4529 _Intervals.pop_front();

4530}

4531

4534 assert(A.first <= A.second && "Invalid interval");

4535 assert(B.first <= B.second && "Invalid interval");

4536

4537

4538 if ((A.first == B.first) || (A.second == B.second))

4539 return true;

4540

4541

4542

4543 if ((A.first > B.first) && (A.second < B.second))

4544 return true;

4545

4546

4547

4548 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))

4549 return true;

4550

4551

4552

4553 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))

4554 return true;

4555

4556 return false;

4557}

4558

4559void ResourceSegments::sortAndMerge() {

4560 if (_Intervals.size() <= 1)

4561 return;

4562

4563

4565

4566

4567 auto next = std::next(std::begin(_Intervals));

4568 auto E = std::end(_Intervals);

4569 for (; next != E; ++next) {

4570 if (std::prev(next)->second >= next->first) {

4571 next->first = std::prev(next)->first;

4572 _Intervals.erase(std::prev(next));

4573 continue;

4574 }

4575 }

4576}

MachineInstrBuilder MachineInstrBuilder & DefMI

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static const Function * getParent(const Value *V)

This file implements the BitVector class.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

COFF::MachineTypes Machine

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)

Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file defines the DenseMap class.

const HexagonInstrInfo * TII

A common definition of LaneBitmask for use in TableGen and CodeGen.

static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)

Return true of the given instruction should not be included in a scheduling region.

static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)

static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)

static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))

Machine Instruction Scheduler

static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)

If this iterator is a debug value, increment until reaching the End or a non-debug instruction.

static const unsigned MinSubtreeSize

static const unsigned InvalidCycle

static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))

static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))

static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)

Decrement this iterator until reaching the top or a non-debug instr.

static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))

MachineSchedOpt allows command line selection of the scheduler.

static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)

static unsigned computeRemLatency(SchedBoundary &CurrZone)

Compute remaining latency.

static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))

static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))

static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))

static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)

static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))

static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))

static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))

static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)

Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...

static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)

static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)

A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.

static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)

Sort predicate for the intervals stored in an instance of ResourceSegments.

static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))

static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)

static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))

static const char * scheduleTableLegend

static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)

static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))

In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...

static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)

static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)

static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)

static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))

static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)

static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))

Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.

static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)

static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))

static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)

unsigned const TargetRegisterInfo * TRI

std::pair< uint64_t, uint64_t > Interval

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the PriorityQueue class.

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static bool isSimple(Instruction *I)

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)

Initialize the set of available library functions based on the specified target triple.

This file describes how to lower LLVM code to machine code.

Target-Independent Code Generator Pass Configuration Options pass.

static const X86InstrFMA3Group Groups[]

Class recording the (high level) value of a variable.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

Class for arbitrary precision integers.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

void setPreservesCFG()

This function should be called by the pass, iff they do not:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

reverse_iterator rend() const

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

reverse_iterator rbegin() const

bool test(unsigned Idx) const

void resize(unsigned N, bool t=false)

resize - Grow or shrink the bitvector.

void clear()

clear - Removes all bits from the bitvector.

This class represents an Operation in the Expression.

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

void traceCandidate(const SchedCandidate &Cand)

void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)

Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...

MachineSchedPolicy RegionPolicy

const TargetSchedModel * SchedModel

static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)

const MachineSchedContext * Context

CandReason

Represent the type of SchedCandidate found within a single queue.

const TargetRegisterInfo * TRI

void checkAcyclicLatency()

Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...

virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const

Apply a set of heuristics to a new candidate.

void dumpPolicy() const override

void initialize(ScheduleDAGMI *dag) override

Initialize the strategy after building the DAG for a new region.

void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)

void registerRoots() override

Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...

void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override

Initialize the per-region scheduling policy.

void reschedulePhysReg(SUnit *SU, bool isTop)

SUnit * pickNode(bool &IsTopNode) override

Pick the best node to balance the schedule. Implements MachineSchedStrategy.

void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)

Pick the best candidate from the queue.

void schedNode(SUnit *SU, bool IsTopNode) override

Update the scheduler's state after scheduling a node.

SUnit * pickNodeBidirectional(bool &IsTopNode)

Pick the best candidate node from either the top or bottom queue.

bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override

Get the base register and byte offset of a load/store instr.

bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override

Test if the given instruction should be considered a scheduling boundary.

Itinerary data supplied by a subtarget to be used by a target.

LiveInterval - This class represents the liveness of a register, or stack slot.

MachineInstr * getInstructionFromIndex(SlotIndex index) const

Returns the instruction associated with the given index.

void handleMove(MachineInstr &MI, bool UpdateFlags=false)

Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.

SlotIndex getInstructionIndex(const MachineInstr &Instr) const

Returns the base index of the given instruction.

SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const

Return the last index in the given basic block.

LiveInterval & getInterval(Register Reg)

Result of a LiveRange query.

VNInfo * valueIn() const

Return the value that is live-in to the instruction.

LiveQueryResult Query(SlotIndex Idx) const

Query Liveness at Idx.

VNInfo * getVNInfoBefore(SlotIndex Idx) const

getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...

SlotIndex beginIndex() const

beginIndex - Return the lowest numbered slot covered.

SlotIndex endIndex() const

endNumber - return the maximum point of the range of the whole, exclusive.

bool isLocal(SlotIndex Start, SlotIndex End) const

True iff this segment is a single segment that lies between the specified boundaries,...

iterator find(SlotIndex Pos)

find - Return an iterator pointing to the first segment that ends after Pos, or end().

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

bool isSuccessor(const MachineBasicBlock *MBB) const

Return true if the specified MBB is a successor of this block.

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

StringRef getName() const

Return the name of the corresponding LLVM basic block, or an empty string.

Analysis pass which computes a MachineDominatorTree.

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

void print(raw_ostream &OS, const SlotIndexes *=nullptr) const

print - Print out the MachineFunction in a format suitable for debugging to the specified stream.

nonconst_iterator getNonConstIterator() const

Representation of each machine instruction.

bool mayLoad(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly read memory.

bool mayStore(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly modify memory.

MachineOperand class - Representation of each machine instruction operand.

MachinePassRegistry - Track the registration of machine passes.

unsigned getNumVirtRegs() const

getNumVirtRegs - Return the number of virtual registers created.

MachineSchedRegistry provides a selection of available machine instruction schedulers.

static MachinePassRegistry< ScheduleDAGCtor > Registry

ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor

MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.

A Module instance is used to store all the information related to an LLVM module.

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override

Optionally override the per-region scheduling policy.

virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)

Apply a set of heuristics to a new candidate for PostRA scheduling.

void schedNode(SUnit *SU, bool IsTopNode) override

Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...

void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)

void initialize(ScheduleDAGMI *Dag) override

Initialize the strategy after building the DAG for a new region.

SUnit * pickNodeBidirectional(bool &IsTopNode)

Pick the best candidate node from either the top or bottom queue.

void registerRoots() override

Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...

SUnit * pickNode(bool &IsTopNode) override

Pick the next node to schedule.

Capture a change in pressure for a single pressure set.

unsigned getPSetOrMax() const

List of PressureChanges in order of increasing, unique PSetID.

void dump(const TargetRegisterInfo &TRI) const

void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)

Add a change in pressure to the pressure diff of a given instruction.

PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...

void clear()

clear - Erase all elements from the queue.

Helpers for implementing custom MachineSchedStrategy classes.

ArrayRef< SUnit * > elements()

bool isInQueue(SUnit *SU) const

std::vector< SUnit * >::iterator iterator

StringRef getName() const

iterator remove(iterator I)

Track the current register pressure at some position in the instruction stream, and remember the high...

void closeRegion()

Finalize the region boundaries and recored live ins and live outs.

void setPos(MachineBasicBlock::const_iterator Pos)

ArrayRef< unsigned > getLiveThru() const

void closeBottom()

Set the boundary for the bottom of the region and summarize live outs.

void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)

Recede across the previous instruction.

RegisterPressure & getPressure()

Get the resulting register pressure over the traversed region.

void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)

Force liveness of virtual registers or physical register units.

void recedeSkipDebugValues()

Recede until we find an instruction which is not a DebugValue.

void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)

Consider the pressure increase caused by traversing this instruction bottom-up.

void initLiveThru(const RegPressureTracker &RPTracker)

Initialize the LiveThru pressure set based on the untied defs found in RPTracker.

void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)

Setup the RegPressureTracker.

MachineBasicBlock::const_iterator getPos() const

Get the MI position corresponding to this register pressure.

void closeTop()

Set the boundary for the top of the region and summarize live ins.

void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)

Consider the pressure increase caused by traversing this instruction top-down.

void advance()

Advance across the current instruction.

const std::vector< unsigned > & getRegSetPressureAtPos() const

Get the register set pressure at the current position, which may be less than the pressure across the...

void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const

This is the fast version of querying register pressure that does not directly depend on current liven...

unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const

getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...

unsigned getRegPressureSetLimit(unsigned Idx) const

Get the register unit limit for the given pressure set index.

List of registers defined and used by a machine instruction.

void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)

Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...

void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)

Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...

void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)

Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...

RegisterPassParser class - Handle the addition of new machine passes.

Wrapper class representing virtual and physical registers.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

static constexpr bool isPhysicalRegister(unsigned Reg)

Return true if the specified register number is in the physical register namespace.

void add(IntervalTy A, const unsigned CutOff=10)

Adds an interval [a, b) to the collection of the instance.

static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)

These function return the interval used by a resource in bottom and top scheduling.

static bool intersects(IntervalTy A, IntervalTy B)

Checks whether intervals intersect.

std::pair< int64_t, int64_t > IntervalTy

Represents an interval of discrete integer values closed on the left and open on the right: [a,...

static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)

Kind getKind() const

Returns an enum value representing the kind of the dependence.

@ Anti

A register anti-dependence (aka WAR).

@ Data

Regular data dependence (aka true-dependence).

bool isWeak() const

Tests if this a weak dependence.

@ Cluster

Weak DAG edge linking a chain of clustered instrs.

@ Artificial

Arbitrary strong DAG edge (no real dependence).

@ Weak

Arbitrary weak DAG edge.

unsigned getLatency() const

Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...

bool isArtificial() const

Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...

bool isCtrl() const

Shorthand for getKind() != SDep::Data.

unsigned getReg() const

Returns the register associated with this edge.

bool isCluster() const

Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...

bool isArtificialDep() const

bool isCtrlDep() const

Tests if this is not an SDep::Data dependence.

Scheduling unit. This is a node in the scheduling DAG.

bool isCall

Is a function call.

unsigned TopReadyCycle

Cycle relative to start when node is ready.

unsigned NodeNum

Entry # of node in the node vector.

void biasCriticalPath()

Orders this node's predecessor edges such that the critical path edge occurs first.

bool isUnbuffered

Uses an unbuffered resource.

unsigned getHeight() const

Returns the height of this node, which is the length of the maximum path down to any node which has n...

unsigned short Latency

Node latency.

unsigned getDepth() const

Returns the depth of this node, which is the length of the maximum path up to any node which has no p...

bool isScheduled

True once scheduled.

bool hasPhysRegDefs

Has physreg defs that are being used.

unsigned BotReadyCycle

Cycle relative to end when node is ready.

SmallVector< SDep, 4 > Succs

All sunit successors.

bool hasReservedResource

Uses a reserved resource.

bool isBottomReady() const

bool hasPhysRegUses

Has physreg uses.

SmallVector< SDep, 4 > Preds

All sunit predecessors.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

Each Scheduling boundary is associated with ready queues.

unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)

Compute the next cycle at which the given processor resource unit can be scheduled.

void releasePending()

Release pending ready nodes in to the available queue.

unsigned getDependentLatency() const

unsigned getScheduledLatency() const

Get the number of latency cycles "covered" by the scheduled instructions.

void incExecutedResources(unsigned PIdx, unsigned Count)

bool isResourceLimited() const

const TargetSchedModel * SchedModel

unsigned getExecutedCount() const

Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...

unsigned getLatencyStallCycles(SUnit *SU)

Get the difference between the given SUnit's ready time and the current cycle.

unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)

void dumpReservedCycles() const

Dump the state of the information that tracks resource usage.

unsigned getOtherResourceCount(unsigned &OtherCritIdx)

void bumpNode(SUnit *SU)

Move the boundary of scheduled code by one SUnit.

unsigned getCriticalCount() const

Get the scaled count of scheduled micro-ops and resources, including executed resources.

SUnit * pickOnlyChoice()

Call this before applying any other heuristics to the Available queue.

void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)

Release SU to make it ready.

unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)

Add the given processor resource to this scheduled zone.

ScheduleHazardRecognizer * HazardRec

bool isUnbufferedGroup(unsigned PIdx) const

void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)

unsigned getResourceCount(unsigned ResIdx) const

void bumpCycle(unsigned NextCycle)

Move the boundary of scheduled code by one cycle.

unsigned getCurrMOps() const

Micro-ops issued in the current cycle.

unsigned getCurrCycle() const

Number of cycles to issue the instructions scheduled in this zone.

bool checkHazard(SUnit *SU)

Does this SU have a hazard within the current instruction group.

std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)

Compute the next cycle at which the given processor resource can be scheduled.

void dumpScheduledState() const

void removeReady(SUnit *SU)

Remove SU from the ready set for this boundary.

unsigned getZoneCritResIdx() const

unsigned getUnscheduledLatency(SUnit *SU) const

Compute the values of each DAG node for various metrics during DFS.

unsigned getNumInstrs(const SUnit *SU) const

Get the number of instructions in the given subtree and its children.

unsigned getSubtreeID(const SUnit *SU) const

Get the ID of the subtree the given DAG node belongs to.

void clear()

Clear the results.

ILPValue getILP(const SUnit *SU) const

Get the ILP value for a DAG node.

void compute(ArrayRef< SUnit > SUnits)

Compute various metrics for the DAG with given roots.

unsigned getNumSubtrees() const

The number of subtrees detected in this DAG.

unsigned getSubtreeLevel(unsigned SubtreeID) const

Get the connection level of a subtree.

void resize(unsigned NumSUnits)

Initialize the result data with the size of the DAG.

void scheduleTree(unsigned SubtreeID)

Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.

A ScheduleDAG for scheduling lists of MachineInstr.

virtual void finishBlock()

Cleans up after scheduling in the given block.

MachineBasicBlock::iterator end() const

Returns an iterator to the bottom of the current scheduling region.

MachineBasicBlock * BB

The block in which to insert instructions.

MachineInstr * FirstDbgValue

virtual void startBlock(MachineBasicBlock *BB)

Prepares to perform scheduling in the given block.

const TargetSchedModel * getSchedModel() const

Gets the machine model for instruction scheduling.

MachineBasicBlock::iterator RegionEnd

The end of the range to be scheduled.

const MCSchedClassDesc * getSchedClass(SUnit *SU) const

Resolves and cache a resolved scheduling class for an SUnit.

DbgValueVector DbgValues

Remember instruction that precedes DBG_VALUE.

bool addEdge(SUnit *SuccSU, const SDep &PredDep)

Add a DAG edge to the given SU with the given predecessor dependence data.

DumpDirection

The direction that should be used to dump the scheduled Sequence.

bool TrackLaneMasks

Whether lane masks should get tracked.

void dumpNode(const SUnit &SU) const override

bool IsReachable(SUnit *SU, SUnit *TargetSU)

IsReachable - Checks if SU is reachable from TargetSU.

MachineBasicBlock::iterator begin() const

Returns an iterator to the top of the current scheduling region.

void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)

Builds SUnits for the current region.

SUnit * getSUnit(MachineInstr *MI) const

Returns an existing SUnit for this MI, or nullptr.

TargetSchedModel SchedModel

TargetSchedModel provides an interface to the machine model.

bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)

True if an edge can be added from PredSU to SuccSU without creating a cycle.

MachineBasicBlock::iterator RegionBegin

The beginning of the range to be scheduled.

virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)

Initialize the DAG and common scheduler state for a new scheduling region.

void dump() const override

void setDumpDirection(DumpDirection D)

ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...

void scheduleMI(SUnit *SU, bool IsTopNode)

Move an instruction and update register pressure.

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

VReg2SUnitMultiMap VRegUses

Maps vregs to the SUnits of their uses in the current scheduling region.

void computeDFSResult()

Compute a DFSResult after DAG building is complete, and before any queue comparisons.

PressureDiff & getPressureDiff(const SUnit *SU)

SchedDFSResult * DFSResult

Information about DAG subtrees.

void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override

Implement the ScheduleDAGInstrs interface for handling the next scheduling region.

void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)

Release ExitSU predecessors and setup scheduler queues.

bool ShouldTrackLaneMasks

RegPressureTracker BotRPTracker

void buildDAGWithRegPressure()

Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.

std::vector< PressureChange > RegionCriticalPSets

List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...

void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)

PressureDiffs SUPressureDiffs

unsigned computeCyclicCriticalPath()

Compute the cyclic critical path through the DAG.

void updatePressureDiffs(ArrayRef< VRegMaskOrUnit > LiveUses)

Update the PressureDiff array for liveness after scheduling this instruction.

void collectVRegUses(SUnit &SU)

RegisterClassInfo * RegClassInfo

const SchedDFSResult * getDFSResult() const

Return a non-null DFS result if the scheduling strategy initialized it.

RegPressureTracker RPTracker

bool ShouldTrackPressure

Register pressure in this region computed by initRegPressure.

~ScheduleDAGMILive() override

void dump() const override

BitVector & getScheduledTrees()

MachineBasicBlock::iterator LiveRegionEnd

RegPressureTracker TopRPTracker

ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...

void dumpSchedule() const

dump the scheduled Sequence.

std::unique_ptr< MachineSchedStrategy > SchedImpl

void startBlock(MachineBasicBlock *bb) override

Prepares to perform scheduling in the given block.

void releasePred(SUnit *SU, SDep *PredEdge)

ReleasePred - Decrement the NumSuccsLeft count of a predecessor.

void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)

Release ExitSU predecessors and setup scheduler queues.

void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)

Add a postprocessing step to the DAG builder.

void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)

Change the position of an instruction within the basic block and update live ranges and region bounda...

void releasePredecessors(SUnit *SU)

releasePredecessors - Call releasePred on each of SU's predecessors.

void postProcessDAG()

Apply each ScheduleDAGMutation step in order.

const SUnit * NextClusterSucc

void dumpScheduleTraceTopDown() const

Print execution trace of the schedule top-down or bottom-up.

const SUnit * NextClusterPred

Record the next node in a scheduled cluster.

MachineBasicBlock::iterator top() const

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)

MachineBasicBlock::iterator bottom() const

MachineBasicBlock::iterator CurrentBottom

The bottom of the unscheduled zone.

virtual bool hasVRegLiveness() const

Return true if this DAG supports VReg liveness and RegPressure.

void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override

Implement the ScheduleDAGInstrs interface for handling the next scheduling region.

LiveIntervals * getLIS() const

void viewGraph() override

Out-of-line implementation with no arguments is handy for gdb.

void releaseSucc(SUnit *SU, SDep *SuccEdge)

ReleaseSucc - Decrement the NumPredsLeft count of a successor.

void dumpScheduleTraceBottomUp() const

~ScheduleDAGMI() override

void finishBlock() override

Cleans up after scheduling in the given block.

const SUnit * getNextClusterPred() const

void updateQueues(SUnit *SU, bool IsTopNode)

Update scheduler DAG and queues after scheduling an instruction.

void placeDebugValues()

Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.

MachineBasicBlock::iterator CurrentTop

The top of the unscheduled zone.

void releaseSuccessors(SUnit *SU)

releaseSuccessors - Call releaseSucc on each of SU's successors.

const SUnit * getNextClusterSucc() const

std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations

Ordered list of DAG postprocessing steps.

Mutate the DAG as a postpass after normal DAG building.

MachineRegisterInfo & MRI

Virtual/real register map.

const TargetInstrInfo * TII

Target instruction information.

std::vector< SUnit > SUnits

The scheduling units.

const TargetRegisterInfo * TRI

Target processor register info.

SUnit EntrySU

Special node for the region entry.

MachineFunction & MF

Machine function.

void dumpNodeAll(const SUnit &SU) const

SUnit ExitSU

Special node for the region exit.

virtual void RecedeCycle()

RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...

virtual void Reset()

Reset - This callback is invoked when a new block of instructions is about to be schedule.

virtual void EmitInstruction(SUnit *)

EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...

virtual void AdvanceCycle()

AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...

virtual HazardType getHazardType(SUnit *, int Stalls=0)

getHazardType - Return the hazard type of emitting this node.

SlotIndex - An opaque wrapper around machine indexes.

static bool isSameInstr(SlotIndex A, SlotIndex B)

isSameInstr - Return true if A and B refer to the same instruction.

SlotIndex getRegSlot(bool EC=false) const

Returns the register use/def slot in the current instruction for a normal or early-clobber def.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

std::reverse_iterator< const_iterator > const_reverse_iterator

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

iterator find(const KeyT &Key)

Find an element by its key.

void clear()

Clears the set.

iterator end()

Returns an iterator past this container.

iterator insert(const ValueT &Val)

Insert a new element at the tail of the subset list.

iterator_base< SparseMultiSet * > iterator

void setUniverse(unsigned U)

Set the universe size which determines the largest key the set can hold.

Information about stack frame layout on the target.

StackDirection getStackGrowthDirection() const

getStackGrowthDirection - Return the direction the stack grows

TargetInstrInfo - Interface to description of machine instruction set.

virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const

Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...

virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const

Return the register class that should be used for the specified value type.

bool isTypeLegal(EVT VT) const

Return true if the target has native support for the specified value type.

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Target-Independent Code Generator Pass Configuration Options.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

virtual const char * getRegPressureSetName(unsigned Idx) const =0

Get the name of this register unit pressure set.

Provide an instruction scheduling machine model to CodeGen passes.

const char * getResourceName(unsigned PIdx) const

bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const

Return true if current group must end.

unsigned getIssueWidth() const

Maximum number of micro-ops that may be scheduled per cycle.

unsigned getMicroOpFactor() const

Multiply number of micro-ops by this factor to normalize it relative to other resources.

ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const

bool hasInstrSchedModel() const

Return true if this machine model includes an instruction-level scheduling model.

bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const

Return true if new group must begin.

unsigned getLatencyFactor() const

Multiply cycle count by this factor to normalize it relative to other resources.

unsigned getResourceFactor(unsigned ResIdx) const

Multiply the number of units consumed for a resource by this factor to normalize it relative to other...

unsigned getMicroOpBufferSize() const

Number of micro-ops that may be buffered for OOO execution.

unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const

Return the number of issue slots required for this MI.

const MCProcResourceDesc * getProcResource(unsigned PIdx) const

Get a processor resource by ID for convenience.

unsigned getNumProcResourceKinds() const

Get the number of kinds of resources for this target.

const InstrItineraryData * getInstrItineraries() const

bool enableIntervals() const

ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const

TargetSubtargetInfo - Generic base class for all target subtargets.

virtual std::vector< MacroFusionPredTy > getMacroFusions() const

Get the list of MacroFusion predicates.

virtual bool enableMachineScheduler() const

True if the subtarget should run MachineScheduler after aggressive coalescing.

virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const

Override generic post-ra scheduling policy within a region.

virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const

Override generic scheduling policy within a region.

virtual bool enablePostRAMachineScheduler() const

True if the subtarget should run a machine scheduler after register allocation.

virtual const TargetFrameLowering * getFrameLowering() const

virtual const TargetInstrInfo * getInstrInfo() const

virtual const TargetLowering * getTargetLowering() const

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

VNInfo - Value Number Information.

SlotIndex def

The index of the defining instruction.

bool isPHIDef() const

Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...

int getNumOccurrences() const

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

StringRef getColorString(unsigned NodeNumber)

Get a color string for this node number.

void apply(Opt *O, const Mod &M, const Mods &... Ms)

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

bool operator<(int64_t V1, const APSInt &V2)

void stable_sort(R &&Range)

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

cl::opt< bool > PrintDAGs

unsigned getWeakLeft(const SUnit *SU, bool isTop)

std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)

Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...

FormattedString right_justify(StringRef Str, unsigned Width)

right_justify - add spaces before string so total output is Width characters.

cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

Printable PrintLaneMask(LaneBitmask LaneMask)

Create Printable object to print LaneBitmasks on a raw_ostream.

cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))

void initializePostMachineSchedulerPass(PassRegistry &)

cl::opt< bool > VerifyScheduling

char & MachineSchedulerID

MachineScheduler - This pass schedules machine instructions.

char & PostMachineSchedulerID

PostMachineScheduler - This pass schedules machine instructions postRA.

ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)

Create the standard converging machine scheduler.

cl::opt< bool > ViewMISchedDAGs

bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)

void sort(IteratorTy Start, IteratorTy End)

Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)

Create Printable object to print virtual registers and physical registers on a raw_ostream.

std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)

If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))

bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)

raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

void initializeMachineSchedulerPass(PassRegistry &)

ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)

Create a generic scheduler with no vreg liveness or DAG mutation passes.

FormattedString left_justify(StringRef Str, unsigned Width)

left_justify - append spaces after string so total output is Width characters.

cl::opt< MISched::Direction > PreRADirection

std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)

If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...

bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)

void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)

ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.

void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)

bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)

Return true if this heuristic determines order.

std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)

Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))

int biasPhysReg(const SUnit *SU, bool isTop)

Minimize physical register live ranges.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)

static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)

If you want to override the dot attributes printed for a particular edge, override this method.

static std::string getGraphName(const ScheduleDAG *G)

static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)

static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)

DOTGraphTraits(bool isSimple=false)

static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)

static bool renderGraphFromBottomUp()

DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...

DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...

Policy for scheduling the next instruction in the candidate's zone.

Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...

void setBest(SchedCandidate &Best)

void reset(const CandPolicy &NewPolicy)

void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)

SchedResourceDelta ResDelta

Status of an instruction's critical resource consumption.

unsigned DemandedResources

static constexpr LaneBitmask getNone()

const unsigned * SubUnitsIdxBegin

Summarize the scheduling resources required for an instruction of a particular scheduling class.

Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

RegisterClassInfo * RegClassInfo

virtual ~MachineSchedContext()

bool DisableLatencyHeuristic

bool ShouldTrackLaneMasks

Track LaneMasks to allow reordering of independent subregister writes of the same vreg.

PressureChange CriticalMax

PressureChange CurrentMax

RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.

SmallVector< VRegMaskOrUnit, 8 > LiveOutRegs

SmallVector< VRegMaskOrUnit, 8 > LiveInRegs

List of live in virtual registers or physical register units.

std::vector< unsigned > MaxSetPressure

Map of max reg pressure indexed by pressure set ID, not class ID.

Summarize the unscheduled region.

void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)

SmallVector< unsigned, 16 > RemainingCounts

bool IsAcyclicLatencyLimited

An individual mapping from virtual register number to SUnit.