LLVM: lib/CodeGen/MachineScheduler.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
51#include "llvm/Config/llvm-config.h"
61#include
62#include
63#include
64#include
65#include
66#include
67#include
68#include
69#include
70#include
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78namespace llvm {
79
81 "misched-prera-direction", cl::Hidden,
82 cl::desc("Pre reg-alloc list scheduling direction"),
86 "Force top-down pre reg-alloc list scheduling"),
88 "Force bottom-up pre reg-alloc list scheduling"),
90 "Force bidirectional pre reg-alloc list scheduling")));
91
93 "misched-postra-direction", cl::Hidden,
94 cl::desc("Post reg-alloc list scheduling direction"),
98 "Force top-down post reg-alloc list scheduling"),
100 "Force bottom-up post reg-alloc list scheduling"),
102 "Force bidirectional post reg-alloc list scheduling")));
103
106 cl::desc("Print critical path length to stdout"));
107
110 cl::desc("Verify machine instrs before and after machine scheduling"));
111
112#ifndef NDEBUG
115 cl::desc("Pop up a window to show MISched dags after they are processed"));
117 cl::desc("Print schedule DAGs"));
120 cl::desc("Dump resource usage at schedule boundary."));
123 cl::desc("Show details of invoking getNextResoufceCycle."));
124#else
128#ifdef LLVM_ENABLE_DUMP
130#endif
131#endif
132
133}
134
135#ifndef NDEBUG
136
137
139 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
140
142 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
143
145 cl::desc("Only schedule this function"));
147 cl::desc("Only schedule this MBB#"));
148#endif
149
150
151
153 cl::desc("Limit ready list to N instructions"), cl::init(256));
154
156 cl::desc("Enable register pressure scheduling."), cl::init(true));
157
159 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
160
162 cl::desc("Enable memop clustering."),
166 cl::desc("Switch to fast cluster algorithm with the lost "
167 "of some fusion opportunities"),
171 cl::desc("The threshold for fast cluster"),
173
174#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
177 cl::desc("Dump resource usage at schedule boundary."));
180 cl::desc("Set width of the columns with "
181 "the resources and schedule units"),
185 cl::desc("Set width of the columns showing resource booking."),
189 cl::desc("Sort the resources printed in the dump trace"));
190#endif
191
195
196
198
199
200void MachineSchedStrategy::anchor() {}
201
202void ScheduleDAGMutation::anchor() {}
203
204
205
206
207
210}
211
214}
215
216namespace {
217
218
221public:
223
225
226protected:
228};
229
230
231class MachineScheduler : public MachineSchedulerBase {
232public:
233 MachineScheduler();
234
235 void getAnalysisUsage(AnalysisUsage &AU) const override;
236
238
239 static char ID;
240
241protected:
243};
244
245
246class PostMachineScheduler : public MachineSchedulerBase {
247public:
248 PostMachineScheduler();
249
250 void getAnalysisUsage(AnalysisUsage &AU) const override;
251
253
254 static char ID;
255
256protected:
258};
259
260}
261
262char MachineScheduler::ID = 0;
263
265
267 "Machine Instruction Scheduler", false, false)
275
276MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
278}
279
280void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
291}
292
293char PostMachineScheduler::ID = 0;
294
296
298 "PostRA Machine Instruction Scheduler", false, false)
304
305PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
307}
308
309void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
316}
317
320
321
322
324 return nullptr;
325}
326
327
332 cl::desc("Machine instruction scheduler to use"));
333
337
339 "enable-misched",
340 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
342
344 "enable-post-misched",
345 cl::desc("Enable the post-ra machine instruction scheduling pass."),
347
348
352 assert(I != Beg && "reached the top of the region, cannot decrement");
353 while (--I != Beg) {
354 if (->isDebugOrPseudoInstr())
355 break;
356 }
357 return I;
358}
359
360
366}
367
368
369
374 if (->isDebugOrPseudoInstr())
375 break;
376 }
377 return I;
378}
379
380
386}
387
388
390
393 return Ctor(this);
394
395
399
400
402}
403
404
405
406
407ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
408
412
413
415}
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
435 return false;
436
439 return false;
441 return false;
442
444
445
446 MF = &mf;
447 MLI = &getAnalysis().getLI();
448 MDT = &getAnalysis().getDomTree();
449 PassConfig = &getAnalysis();
450 AA = &getAnalysis().getAAResults();
451
452 LIS = &getAnalysis().getLIS();
453
456 MF->verify(this, "Before machine scheduling.", &errs());
457 }
458 RegClassInfo->runOnMachineFunction(*MF);
459
460
461
462 std::unique_ptr Scheduler(createMachineScheduler());
463 scheduleRegions(*Scheduler, false);
464
467 MF->verify(this, "After machine scheduling.", &errs());
468 return true;
469}
470
471bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
473 return false;
474
477 return false;
479 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
480 return false;
481 }
483
484
485 MF = &mf;
486 MLI = &getAnalysis().getLI();
487 PassConfig = &getAnalysis();
488 AA = &getAnalysis().getAAResults();
489
491 MF->verify(this, "Before post machine scheduling.", &errs());
492
493
494
495 std::unique_ptr Scheduler(createPostMachineScheduler());
496 scheduleRegions(*Scheduler, true);
497
499 MF->verify(this, "After post machine scheduling.", &errs());
500 return true;
501}
502
503
504
505
506
507
508
509
510
511
512
518 MI->isFakeUse();
519}
520
521
522namespace {
523struct SchedRegion {
524
525
526
527
528
531 unsigned NumRegionInstrs;
532
534 unsigned N) :
535 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
536};
537}
538
540
541static void
544 bool RegionsTopDown) {
547
550 RegionEnd != MBB->begin(); RegionEnd = I) {
551
552
553 if (RegionEnd != MBB->end() ||
555 --RegionEnd;
556 }
557
558
559
560 unsigned NumRegionInstrs = 0;
561 I = RegionEnd;
565 break;
566 if (.isDebugOrPseudoInstr()) {
567
568
569 ++NumRegionInstrs;
570 }
571 }
572
573
574
575 if (NumRegionInstrs != 0)
576 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
577 }
578
579 if (RegionsTopDown)
580 std::reverse(Regions.begin(), Regions.end());
581}
582
583
585 bool FixKillFlags) {
586
587
588
589
592
594
595#ifndef NDEBUG
597 continue;
600 continue;
601#endif
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
619 for (const SchedRegion &R : MBBRegions) {
622 unsigned NumRegionInstrs = R.NumRegionInstrs;
623
624
625
626 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
627
628
629 if (I == RegionEnd || I == std::prev(RegionEnd)) {
630
631
633 continue;
634 }
635 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
637 << " " << MBB->getName() << "\n From: " << *I
638 << " To: ";
639 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
640 else dbgs() << "End\n";
641 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
643 errs() << MF->getName();
646 }
647
648
649
651
652
654 }
656
657
658
659 if (FixKillFlags)
661 }
663}
664
665void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
666
667}
668
669#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
671 dbgs() << "Queue " << Name << ": ";
672 for (const SUnit *SU : Queue)
673 dbgs() << SU->NodeNum << " ";
674 dbgs() << "\n";
675}
676#endif
677
678
679
680
681
682
683
684
686
687
688
689
690
693
694 if (SuccEdge->isWeak()) {
698 return;
699 }
700#ifndef NDEBUG
702 dbgs() << "*** Scheduling failed! ***\n";
704 dbgs() << " has been released too many times!\n";
706 }
707#endif
708
709
712
715 SchedImpl->releaseTopNode(SuccSU);
716}
717
718
722}
723
724
725
726
727
730
731 if (PredEdge->isWeak()) {
735 return;
736 }
737#ifndef NDEBUG
739 dbgs() << "*** Scheduling failed! ***\n";
741 dbgs() << " has been released too many times!\n";
743 }
744#endif
745
746
749
752 SchedImpl->releaseBottomNode(PredSU);
753}
754
755
759}
760
764}
765
769}
770
771
772
773
774
778 unsigned regioninstrs)
779{
781
783
784
786 if (SchedImpl->getPolicy().OnlyTopDown)
788 else if (SchedImpl->getPolicy().OnlyBottomUp)
790 else
793}
794
795
796
799
802
803
805
806
809
810
813}
814
816#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
819 return false;
820 }
821 ++NumInstrsScheduled;
822#endif
823 return true;
824}
825
826
827
828
829
831 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
833
834
836
838
841
845
846
847
849
850
852
853 bool IsTopNode = false;
854 while (true) {
855 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
857 if (!SU) break;
858
861 break;
862
864 if (IsTopNode) {
865 assert(SU->isTopReady() && "node still has unscheduled dependencies");
868 else
870 } else {
871 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
874 if (&*priorII == MI)
876 else {
881 }
882 }
883
884
885
886
887 SchedImpl->schedNode(SU, IsTopNode);
888
890 }
892
894
896 dbgs() << "*** Final schedule for "
899 dbgs() << '\n';
900 });
901}
902
903
906 m->apply(this);
907}
908
913 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
914
915
916 SU.biasCriticalPath();
917
918
919 if (!SU.NumPredsLeft)
921
922 if (!SU.NumSuccsLeft)
924 }
926}
927
928
933
934
935
936
937
938 for (SUnit *SU : TopRoots)
940
941
942
944 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
946 }
947
950
952
953
956}
957
958
960
961 if (IsTopNode)
963 else
965
967}
968
969
971
975 }
976
977 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
979 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
987 }
988}
989
990#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
992
994
996 return;
997
998
1000 return;
1001
1002 dbgs() << " * Schedule table (TopDown):\n";
1008 if (!SU)
1009 continue;
1013 PI != PE; ++PI) {
1014 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1015 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1016 }
1017 }
1018
1020 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1022 dbgs() << "|\n";
1023
1026 if (!SU) {
1027 dbgs() << "Missing SUnit\n";
1028 continue;
1029 }
1030 std::string NodeName("SU(");
1031 NodeName += std::to_string(SU->NodeNum) + ")";
1033 unsigned C = FirstCycle;
1034 for (; C <= LastCycle; ++C) {
1037 else
1039 }
1040 dbgs() << "|\n";
1042
1046
1051 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1052 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1053 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1054 });
1056 C = FirstCycle;
1057 const std::string ResName =
1060 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1062 }
1063 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1066 while (C++ <= LastCycle)
1068
1069 dbgs() << "| \n";
1070 }
1071 }
1072}
1073
1075
1077 return;
1078
1079
1081 return;
1082
1083 dbgs() << " * Schedule table (BottomUp):\n";
1085
1090 if (!SU)
1091 continue;
1095 PI != PE; ++PI) {
1096 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1097 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1098 }
1099 }
1100
1102 for (int C = FirstCycle; C >= LastCycle; --C)
1104 dbgs() << "|\n";
1105
1108 if (!SU) {
1109 dbgs() << "Missing SUnit\n";
1110 continue;
1111 }
1112 std::string NodeName("SU(");
1113 NodeName += std::to_string(SU->NodeNum) + ")";
1115 int C = FirstCycle;
1116 for (; C >= LastCycle; --C) {
1119 else
1121 }
1122 dbgs() << "|\n";
1127
1132 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1133 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1134 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1135 });
1137 C = FirstCycle;
1138 const std::string ResName =
1141 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1143 }
1144 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1147 while (C-- >= LastCycle)
1149
1150 dbgs() << "| \n";
1151 }
1152 }
1153}
1154#endif
1155
1156#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1164 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1165 } else {
1166 dbgs() << "* Schedule table: DumpDirection not set.\n";
1167 }
1168 }
1169
1173 else
1174 dbgs() << "Missing SUnit\n";
1175 }
1176}
1177#endif
1178
1179
1180
1181
1182
1183
1186}
1187
1191 if (!MO.isReg())
1192 continue;
1193 if (!MO.readsReg())
1194 continue;
1196 continue;
1197
1199 if (!Reg.isVirtual())
1200 continue;
1201
1202
1204 bool FoundDef = false;
1206 if (MO2.getReg() == Reg && !MO2.isDead()) {
1207 FoundDef = true;
1208 break;
1209 }
1210 }
1211 if (FoundDef)
1212 continue;
1213 }
1214
1215
1218 if (UI->SU == &SU)
1219 break;
1220 }
1223 }
1224}
1225
1226
1227
1228
1229
1233 unsigned regioninstrs)
1234{
1235
1237
1238
1240
1242
1245
1247 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1248}
1249
1250
1251
1257
1262
1263
1265
1267
1268
1271
1272
1273
1274
1277
1283 };
1284
1285
1286
1288
1289
1294 }
1295
1298 dbgs() << "Bottom Pressure:\n";
1300
1304 "Can't find the region bottom");
1305
1306
1307
1311 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1317 }
1318 }
1323 dbgs() << "\n");
1324}
1325
1328 const std::vector &NewMaxPressure) {
1332 if (!PC.isValid())
1333 break;
1334 unsigned ID = PC.getPSet();
1336 ++CritIdx;
1339 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1341 }
1343 if (NewMaxPressure[ID] >= Limit - 2) {
1345 << NewMaxPressure[ID]
1346 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1348 << " livethru)\n");
1349 }
1350 }
1351}
1352
1353
1354
1359
1360 if (!Reg.isVirtual())
1361 continue;
1362
1364
1365
1366
1367
1368 bool Decrement = P.LaneMask.any();
1369
1372 SUnit &SU = *V2SU.SU;
1374 continue;
1375
1382 }
1383 } else {
1384 assert(P.LaneMask.any());
1386
1387
1388
1389
1396 else {
1399 }
1400
1401 assert(VNI && "No live value at use.");
1404 SUnit *SU = V2SU.SU;
1405
1406
1410 if (LRQ.valueIn() == VNI) {
1416 }
1417 }
1418 }
1419 }
1420 }
1421}
1422
1424#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1430 dbgs() << " Pressure Diff : ";
1432 }
1433 dbgs() << " Single Issue : ";
1436 dbgs() << "true;";
1437 else
1438 dbgs() << "false;";
1439 dbgs() << '\n';
1440 }
1443#endif
1444}
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1457 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1460
1462
1465
1466
1467
1469
1473
1474
1476
1477 bool IsTopNode = false;
1478 while (true) {
1479 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1481 if (!SU) break;
1482
1485 break;
1486
1488
1494 SchedImpl->scheduleTree(SubtreeID);
1495 }
1496 }
1497
1498
1499 SchedImpl->schedNode(SU, IsTopNode);
1500
1502 }
1504
1506
1508 dbgs() << "*** Final schedule for "
1511 dbgs() << '\n';
1512 });
1513}
1514
1515
1521 return;
1522 }
1523
1524
1527
1528
1531
1532
1534
1535
1537}
1538
1547}
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1576
1578 return 0;
1579
1580 unsigned MaxCyclicLatency = 0;
1581
1584 if (!Reg.isVirtual())
1585 continue;
1588 if (!DefVNI)
1589 continue;
1590
1593 if (!DefSU)
1594 continue;
1595
1596 unsigned LiveOutHeight = DefSU->getHeight();
1597 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1598
1601 SUnit *SU = V2SU.SU;
1603 continue;
1604
1605
1608 continue;
1609
1610
1611
1612
1613 unsigned CyclicLatency = 0;
1614 if (LiveOutDepth > SU->getDepth())
1615 CyclicLatency = LiveOutDepth - SU->getDepth();
1616
1618 if (LiveInHeight > LiveOutHeight) {
1619 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1620 CyclicLatency = LiveInHeight - LiveOutHeight;
1621 } else
1622 CyclicLatency = 0;
1623
1625 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1626 if (CyclicLatency > MaxCyclicLatency)
1627 MaxCyclicLatency = CyclicLatency;
1628 }
1629 }
1630 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1631 return MaxCyclicLatency;
1632}
1633
1634
1635
1642 }
1643}
1644
1645
1647
1649
1650 if (IsTopNode) {
1651 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1654 else {
1657 }
1658
1660
1663 false);
1665
1668 } else {
1669
1671 }
1672
1677
1679 }
1680 } else {
1681 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1684 if (&*priorII == MI)
1686 else {
1690 }
1694 }
1698 false);
1700
1703 } else {
1704
1706 }
1707
1715
1718 }
1719 }
1720}
1721
1722
1723
1724
1725
1726namespace {
1727
1728
1729
1731 struct MemOpInfo {
1736 bool OffsetIsScalable;
1737
1740 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),
1741 OffsetIsScalable(OffsetIsScalable) {}
1742
1745 if (A->getType() != B->getType())
1746 return A->getType() < B->getType();
1747 if (A->isReg())
1748 return A->getReg() < B->getReg();
1749 if (A->isFI()) {
1750 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1754 return StackGrowsDown ? A->getIndex() > B->getIndex()
1755 : A->getIndex() < B->getIndex();
1756 }
1757
1758 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1759 "index bases.");
1760 }
1761
1762 bool operator<(const MemOpInfo &RHS) const {
1763
1764
1765 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1766 RHS.BaseOps.begin(), RHS.BaseOps.end(),
1767 Compare))
1768 return true;
1769 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1770 BaseOps.begin(), BaseOps.end(), Compare))
1771 return false;
1774 return SU->NodeNum < RHS.SU->NodeNum;
1775 }
1776 };
1777
1780 bool IsLoad;
1781 bool ReorderWhileClustering;
1782
1783public:
1786 bool ReorderWhileClustering)
1787 : TII(tii), TRI(tri), IsLoad(IsLoad),
1788 ReorderWhileClustering(ReorderWhileClustering) {}
1789
1791
1792protected:
1793 void clusterNeighboringMemOps(ArrayRef MemOps, bool FastCluster,
1795 void collectMemOpRecords(std::vector &SUnits,
1799};
1800
1801class StoreClusterMutation : public BaseMemOpClusterMutation {
1802public:
1805 bool ReorderWhileClustering)
1806 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
1807};
1808
1809class LoadClusterMutation : public BaseMemOpClusterMutation {
1810public:
1812 bool ReorderWhileClustering)
1813 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
1814};
1815
1816}
1817
1818namespace llvm {
1819
1820std::unique_ptr
1823 bool ReorderWhileClustering) {
1825 TII, TRI, ReorderWhileClustering)
1826 : nullptr;
1827}
1828
1829std::unique_ptr
1832 bool ReorderWhileClustering) {
1834 TII, TRI, ReorderWhileClustering)
1835 : nullptr;
1836}
1837
1838}
1839
1840
1841
1842
1843
1844
1845void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1848
1850
1851
1852
1853 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1854
1855 auto MemOpa = MemOpRecords[Idx];
1856
1857
1858 unsigned NextIdx = Idx + 1;
1859 for (; NextIdx < End; ++NextIdx)
1860
1861
1862 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1863 (FastCluster ||
1864 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1865 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1866 break;
1867 if (NextIdx == End)
1868 continue;
1869
1870 auto MemOpb = MemOpRecords[NextIdx];
1871 unsigned ClusterLength = 2;
1872 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1873 MemOpb.Width.getValue().getKnownMinValue();
1874 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1875 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1876 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
1877 MemOpb.Width.getValue().getKnownMinValue();
1878 }
1879
1880 if (->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
1881 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
1882 MemOpb.Offset, MemOpb.OffsetIsScalable,
1883 ClusterLength, CurrentClusterBytes))
1884 continue;
1885
1886 SUnit *SUa = MemOpa.SU;
1887 SUnit *SUb = MemOpb.SU;
1888 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1890
1891
1893 continue;
1894
1896 << SUb->NodeNum << ")\n");
1897 ++NumClustered;
1898
1899 if (IsLoad) {
1900
1901
1902
1903
1904 for (const SDep &Succ : SUa->Succs) {
1906 continue;
1908 << ")\n");
1910 }
1911 } else {
1912
1913
1914
1915
1916
1917
1918 for (const SDep &Pred : SUb->Preds) {
1920 continue;
1922 << ")\n");
1924 }
1925 }
1926
1927 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1928 CurrentClusterBytes};
1929
1930 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1931 << ", Curr cluster bytes: " << CurrentClusterBytes
1932 << "\n");
1933 }
1934}
1935
1936void BaseMemOpClusterMutation::collectMemOpRecords(
1938 for (auto &SU : SUnits) {
1941 continue;
1942
1946 bool OffsetIsScalable;
1949 OffsetIsScalable, Width, TRI)) {
1951 continue;
1952
1954 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
1955
1956 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1957 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1958 << ", Width: " << Width << "\n");
1959 }
1960#ifndef NDEBUG
1961 for (const auto *Op : BaseOps)
1963#endif
1964 }
1965}
1966
1967bool BaseMemOpClusterMutation::groupMemOps(
1970 bool FastCluster =
1973
1974 for (const auto &MemOp : MemOps) {
1975 unsigned ChainPredID = DAG->SUnits.size();
1976 if (FastCluster) {
1977 for (const SDep &Pred : MemOp.SU->Preds) {
1978
1979
1980
1981 if ((Pred.isCtrl() &&
1982 (IsLoad ||
1986 break;
1987 }
1988 }
1989 } else
1990 ChainPredID = 0;
1991
1993 }
1994 return FastCluster;
1995}
1996
1997
1999
2001 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2002
2003 if (MemOpRecords.size() < 2)
2004 return;
2005
2006
2007
2008
2010 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2011
2012 for (auto &Group : Groups) {
2013
2014
2016
2017
2018 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2019 }
2020}
2021
2022
2023
2024
2025
2026namespace {
2027
2028
2029
2030
2032
2034
2035
2036
2038
2039public:
2041
2043
2044protected:
2046};
2047
2048}
2049
2050namespace llvm {
2051
2052std::unique_ptr
2055 return std::make_unique(TII, TRI);
2056}
2057
2058}
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2082
2083
2087 return;
2088
2092 return;
2093
2094
2095
2096
2097
2098
2099
2100 unsigned LocalReg = SrcReg;
2101 unsigned GlobalReg = DstReg;
2103 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2104 LocalReg = DstReg;
2105 GlobalReg = SrcReg;
2107 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2108 return;
2109 }
2111
2112
2114
2115
2116
2117
2118 if (GlobalSegment == GlobalLI->end())
2119 return;
2120
2121
2122
2123
2124
2125 if (GlobalSegment->contains(LocalLI->beginIndex()))
2126 ++GlobalSegment;
2127
2128 if (GlobalSegment == GlobalLI->end())
2129 return;
2130
2131
2132 if (GlobalSegment != GlobalLI->begin()) {
2133
2135 GlobalSegment->start)) {
2136 return;
2137 }
2138
2139
2142 return;
2143 }
2144
2145
2146 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2147 "Disconnected LRG within the scheduling region.");
2148 }
2150 if (!GlobalDef)
2151 return;
2152
2154 if (!GlobalSU)
2155 return;
2156
2157
2158
2162 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2163 for (const SDep &Succ : LastLocalSU->Succs) {
2165 continue;
2166 if (Succ.getSUnit() == GlobalSU)
2167 continue;
2169 return;
2171 }
2172
2173
2177 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2178 for (const SDep &Pred : GlobalSU->Preds) {
2180 continue;
2181 if (Pred.getSUnit() == FirstLocalSU)
2182 continue;
2184 return;
2186 }
2188
2189 for (SUnit *LU : LocalUses) {
2190 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2191 << GlobalSU->NodeNum << ")\n");
2193 }
2194 for (SUnit *GU : GlobalUses) {
2195 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2196 << FirstLocalSU->NodeNum << ")\n");
2198 }
2199}
2200
2201
2202
2206
2208 if (FirstPos == DAG->end())
2209 return;
2213
2216 continue;
2217
2219 }
2220}
2221
2222
2223
2224
2225
2226
2228
2230
2231
2232
2233
2234
2236 unsigned Latency, bool AfterSchedNode) {
2237 int ResCntFactor = (int)(Count - (Latency * LFactor));
2238 if (AfterSchedNode)
2239 return ResCntFactor >= (int)LFactor;
2240 else
2241 return ResCntFactor > (int)LFactor;
2242}
2243
2245
2246
2247
2251 }
2254 CheckPending = false;
2255 CurrCycle = 0;
2256 CurrMOps = 0;
2257 MinReadyCycle = std::numeric_limits::max();
2258 ExpectedLatency = 0;
2259 DependentLatency = 0;
2260 RetiredMOps = 0;
2261 MaxExecutedResCount = 0;
2262 ZoneCritResIdx = 0;
2263 IsResourceLimited = false;
2264 ReservedCycles.clear();
2265 ReservedResourceSegments.clear();
2266 ReservedCyclesIndex.clear();
2267 ResourceGroupSubUnitMasks.clear();
2268#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2269
2270
2271
2272 MaxObservedStall = 0;
2273#endif
2274
2275 ExecutedResCounts.resize(1);
2276 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2277}
2278
2283 return;
2292 unsigned PIdx = PI->ProcResourceIdx;
2294 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2296 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2297 }
2298 }
2299}
2300
2304 DAG = dag;
2309 ReservedCyclesIndex.resize(ResourceCount);
2310 ExecutedResCounts.resize(ResourceCount);
2311 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2312 unsigned NumUnits = 0;
2313
2314 for (unsigned i = 0; i < ResourceCount; ++i) {
2315 ReservedCyclesIndex[i] = NumUnits;
2320 U != UE; ++U)
2321 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2322 }
2323 }
2324
2325 ReservedCycles.resize(NumUnits, InvalidCycle);
2326 }
2327}
2328
2329
2330
2331
2332
2333
2334
2335
2338 return 0;
2339
2341 if (ReadyCycle > CurrCycle)
2342 return ReadyCycle - CurrCycle;
2343 return 0;
2344}
2345
2346
2347
2349 unsigned ReleaseAtCycle,
2350 unsigned AcquireAtCycle) {
2353 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2354 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2355
2356 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2357 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2358 }
2359
2360 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2361
2363 return CurrCycle;
2364
2366 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2367 return NextUnreserved;
2368}
2369
2370
2371
2372
2373std::pair<unsigned, unsigned>
2375 unsigned ReleaseAtCycle,
2376 unsigned AcquireAtCycle) {
2378 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2380 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2381 }
2383 unsigned InstanceIdx = 0;
2384 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2386 assert(NumberOfInstances > 0 &&
2387 "Cannot have zero instances of a ProcResource");
2388
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2404 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2406 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2407 StartIndex);
2408
2410 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2411 unsigned NextUnreserved, NextInstanceIdx;
2412 std::tie(NextUnreserved, NextInstanceIdx) =
2414 if (MinNextUnreserved > NextUnreserved) {
2415 InstanceIdx = NextInstanceIdx;
2416 MinNextUnreserved = NextUnreserved;
2417 }
2418 }
2419 return std::make_pair(MinNextUnreserved, InstanceIdx);
2420 }
2421
2422 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2423 ++I) {
2424 unsigned NextUnreserved =
2427 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2428 << NextUnreserved << "c\n");
2429 if (MinNextUnreserved > NextUnreserved) {
2430 InstanceIdx = I;
2431 MinNextUnreserved = NextUnreserved;
2432 }
2433 }
2436 << "[" << InstanceIdx - StartIndex << "]"
2437 << " available @" << MinNextUnreserved << "c"
2438 << "\n");
2439 return std::make_pair(MinNextUnreserved, InstanceIdx);
2440}
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2458 return true;
2459 }
2460
2465 return true;
2466 }
2467
2468 if (CurrMOps > 0 &&
2472 << (isTop() ? "begin" : "end") << " group\n");
2473 return true;
2474 }
2475
2481 unsigned ResIdx = PE.ProcResourceIdx;
2482 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2483 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2484 unsigned NRCycle, InstanceIdx;
2485 std::tie(NRCycle, InstanceIdx) =
2487 if (NRCycle > CurrCycle) {
2488#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2489 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2490#endif
2493 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2494 << "=" << NRCycle << "c\n");
2495 return true;
2496 }
2497 }
2498 }
2499 return false;
2500}
2501
2502
2505 SUnit *LateSU = nullptr;
2506 unsigned RemLatency = 0;
2507 for (SUnit *SU : ReadySUs) {
2509 if (L > RemLatency) {
2510 RemLatency = L;
2511 LateSU = SU;
2512 }
2513 }
2514 if (LateSU) {
2516 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2517 }
2518 return RemLatency;
2519}
2520
2521
2522
2523
2526 OtherCritIdx = 0;
2528 return 0;
2529
2535 PIdx != PEnd; ++PIdx) {
2537 if (OtherCount > OtherCritCount) {
2538 OtherCritCount = OtherCount;
2539 OtherCritIdx = PIdx;
2540 }
2541 }
2542 if (OtherCritIdx) {
2547 }
2548 return OtherCritCount;
2549}
2550
2552 unsigned Idx) {
2553 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2554
2555#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2556
2557
2558
2559 if (ReadyCycle > CurrCycle)
2560 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2561#endif
2562
2563 if (ReadyCycle < MinReadyCycle)
2564 MinReadyCycle = ReadyCycle;
2565
2566
2567
2569 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2571
2572 if (!HazardDetected) {
2574
2575 if (InPQueue)
2577 return;
2578 }
2579
2580 if (!InPQueue)
2582}
2583
2584
2587 assert(MinReadyCycle < std::numeric_limits::max() &&
2588 "MinReadyCycle uninitialized");
2589 if (MinReadyCycle > NextCycle)
2590 NextCycle = MinReadyCycle;
2591 }
2592
2594 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2595
2596
2597 if ((NextCycle - CurrCycle) > DependentLatency)
2598 DependentLatency = 0;
2599 else
2600 DependentLatency -= (NextCycle - CurrCycle);
2601
2603
2604 CurrCycle = NextCycle;
2605 } else {
2606
2607 for (; CurrCycle != NextCycle; ++CurrCycle) {
2610 else
2612 }
2613 }
2614 CheckPending = true;
2615 IsResourceLimited =
2618
2620 << '\n');
2621}
2622
2624 ExecutedResCounts[PIdx] += Count;
2625 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2626 MaxExecutedResCount = ExecutedResCounts[PIdx];
2627}
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2640 unsigned ReleaseAtCycle,
2641 unsigned NextCycle,
2642 unsigned AcquireAtCycle) {
2644 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2646 << ReleaseAtCycle << "x" << Factor << "u\n");
2647
2648
2652
2653
2654
2656 ZoneCritResIdx = PIdx;
2660 << "c\n");
2661 }
2662
2663 unsigned NextAvailable, InstanceIdx;
2664 std::tie(NextAvailable, InstanceIdx) =
2666 if (NextAvailable > CurrCycle) {
2669 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2670 << " reserved until @" << NextAvailable << "\n");
2671 }
2672 return NextAvailable;
2673}
2674
2675
2677
2680
2681
2683 }
2685
2686 CheckPending = true;
2687 }
2688
2689
2694 "Cannot schedule this instruction's MicroOps in the current cycle.");
2695
2697 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2698
2699 unsigned NextCycle = CurrCycle;
2701 case 0:
2702 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2703 break;
2704 case 1:
2705 if (ReadyCycle > NextCycle) {
2706 NextCycle = ReadyCycle;
2707 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2708 }
2709 break;
2710 default:
2711
2712
2713
2714
2715 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2716 NextCycle = ReadyCycle;
2717 break;
2718 }
2719 RetiredMOps += IncMOps;
2720
2721
2726 if (ZoneCritResIdx) {
2727
2728 unsigned ScaledMOps =
2730
2731
2732
2735 ZoneCritResIdx = 0;
2736 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2738 << "c\n");
2739 }
2740 }
2744 unsigned RCycle =
2745 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2746 PI->AcquireAtCycle);
2747 if (RCycle > NextCycle)
2748 NextCycle = RCycle;
2749 }
2751
2752
2753
2754
2758 unsigned PIdx = PI->ProcResourceIdx;
2760
2762 unsigned ReservedUntil, InstanceIdx;
2764 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2766 ReservedResourceSegments[InstanceIdx].add(
2768 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2770 } else {
2771 ReservedResourceSegments[InstanceIdx].add(
2773 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2775 }
2776 } else {
2777
2778 unsigned ReservedUntil, InstanceIdx;
2780 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2782 ReservedCycles[InstanceIdx] =
2783 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2784 } else
2785 ReservedCycles[InstanceIdx] = NextCycle;
2786 }
2787 }
2788 }
2789 }
2790 }
2791
2792 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2793 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2794 if (SU->getDepth() > TopLatency) {
2795 TopLatency = SU->getDepth();
2797 << SU->NodeNum << ") " << TopLatency << "c\n");
2798 }
2799 if (SU->getHeight() > BotLatency) {
2802 << SU->NodeNum << ") " << BotLatency << "c\n");
2803 }
2804
2805 if (NextCycle > CurrCycle)
2807 else
2808
2809
2810 IsResourceLimited =
2813
2814
2815
2816
2817
2818 CurrMOps += IncMOps;
2819
2820
2821
2822
2823
2827 << " group\n");
2829 }
2830
2832 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2833 << CurrCycle << '\n');
2835 }
2837}
2838
2839
2840
2842
2844 MinReadyCycle = std::numeric_limits::max();
2845
2846
2847
2848 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2851
2852 if (ReadyCycle < MinReadyCycle)
2853 MinReadyCycle = ReadyCycle;
2854
2856 break;
2857
2860 --I;
2861 --E;
2862 }
2863 }
2864 CheckPending = false;
2865}
2866
2867
2871 else {
2874 }
2875}
2876
2877
2878
2879
2881 if (CheckPending)
2883
2884
2889 continue;
2890 }
2891 ++I;
2892 }
2894
2895
2896
2897 (void)i;
2900 }
2901
2904
2907 return nullptr;
2908}
2909
2910#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2911
2912
2913
2914
2917 return;
2918
2920 unsigned StartIdx = 0;
2921
2922 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2925 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2926 dbgs() << ResName << "(" << UnitIdx << ") = ";
2928 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2929 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2930 else
2931 dbgs() << "{ }\n";
2932 } else
2933 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2934 }
2935 StartIdx += NumUnits;
2936 }
2937}
2938
2939
2940
2942 unsigned ResFactor;
2943 unsigned ResCount;
2944 if (ZoneCritResIdx) {
2947 } else {
2949 ResCount = RetiredMOps * ResFactor;
2950 }
2953 << " Retired: " << RetiredMOps;
2955 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2956 << ResCount / ResFactor << " "
2958 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2959 << (IsResourceLimited ? " - Resource" : " - Latency")
2960 << " limited.\n";
2963}
2964#endif
2965
2966
2967
2968
2969
2974 return;
2975
2984 }
2985}
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3005 RemLatency = std::max(RemLatency,
3007 RemLatency = std::max(RemLatency,
3009 return RemLatency;
3010}
3011
3012
3013
3014bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3016 bool ComputeRemLatency,
3017 unsigned &RemLatency) const {
3018
3019
3021 return true;
3022
3023
3025 return false;
3026
3027 if (ComputeRemLatency)
3029
3031}
3032
3033
3034
3038
3039
3040
3041
3042
3043 unsigned OtherCritIdx = 0;
3044 unsigned OtherCount =
3046
3047 bool OtherResLimited = false;
3048 unsigned RemLatency = 0;
3049 bool RemLatencyComputed = false;
3052 RemLatencyComputed = true;
3054 OtherCount, RemLatency, false);
3055 }
3056
3057
3058
3059
3060 if (!OtherResLimited &&
3061 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3062 RemLatency))) {
3065 << " RemainingLatency " << RemLatency << " + "
3066 << CurrZone.getCurrCycle() << "c > CritPath "
3068 }
3069
3071 return;
3072
3074 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3075 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3076 } if (OtherResLimited) dbgs()
3077 << " RemainingLimit: "
3080 << " Latency limited both directions.\n");
3081
3084
3085 if (OtherResLimited)
3087}
3088
3089#ifndef NDEBUG
3092 switch (Reason) {
3093 case NoCand: return "NOCAND ";
3094 case Only1: return "ONLY1 ";
3095 case PhysReg: return "PHYS-REG ";
3096 case RegExcess: return "REG-EXCESS";
3098 case Stall: return "STALL ";
3099 case Cluster: return "CLUSTER ";
3100 case Weak: return "WEAK ";
3101 case RegMax: return "REG-MAX ";
3109 case NodeOrder: return "ORDER ";
3110 };
3112}
3113
3116 unsigned ResIdx = 0;
3118 switch (Cand.Reason) {
3119 default:
3120 break;
3123 break;
3126 break;
3129 break;
3132 break;
3135 break;
3138 break;
3141 break;
3144 break;
3147 break;
3148 }
3150 if (P.isValid())
3152 << ":" << P.getUnitInc() << " ";
3153 else
3154 dbgs() << " ";
3155 if (ResIdx)
3157 else
3158 dbgs() << " ";
3161 else
3162 dbgs() << " ";
3163 dbgs() << '\n';
3164}
3165#endif
3166
3167namespace llvm {
3168
3169
3170
3175 if (TryVal < CandVal) {
3176 TryCand.Reason = Reason;
3177 return true;
3178 }
3179 if (TryVal > CandVal) {
3180 if (Cand.Reason > Reason)
3181 Cand.Reason = Reason;
3182 return true;
3183 }
3184 return false;
3185}
3186
3191 if (TryVal > CandVal) {
3192 TryCand.Reason = Reason;
3193 return true;
3194 }
3195 if (TryVal < CandVal) {
3196 if (Cand.Reason > Reason)
3197 Cand.Reason = Reason;
3198 return true;
3199 }
3200 return false;
3201}
3202
3206 if (Zone.isTop()) {
3207
3208
3209
3214 return true;
3215 }
3218 return true;
3219 } else {
3220
3221
3222
3227 return true;
3228 }
3231 return true;
3232 }
3233 return false;
3234}
3235}
3236
3238 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3240}
3241
3244}
3245
3248 "(PreRA)GenericScheduler needs vreg liveness");
3252
3254 DAG->computeDFSResult();
3255
3259
3260
3261
3262
3263
3265 if (!Top.HazardRec) {
3267 }
3268 if (!Bot.HazardRec) {
3270 }
3271 TopCand.SU = nullptr;
3272 BotCand.SU = nullptr;
3273}
3274
3275
3278 unsigned NumRegionInstrs) {
3281
3282
3283
3284
3285
3287 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3293 break;
3294 }
3295 }
3296
3297
3298
3300
3301
3303
3304
3308 }
3309
3319 }
3320}
3321
3323
3324#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3325 dbgs() << "GenericScheduler RegionPolicy: "
3329 << "\n";
3330#endif
3331}
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3344 return;
3345
3346
3347 unsigned IterCount =
3350
3352
3353 unsigned InFlightCount =
3354 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3355 unsigned BufferLimit =
3357
3359
3361 dbgs() << "IssueCycles="
3364 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3368}
3369
3372
3373
3374 for (const SUnit *SU : Bot.Available) {
3377 }
3381 }
3382
3385 checkAcyclicLatency();
3386 }
3387}
3388
3389namespace llvm {
3397
3398
3400 Reason)) {
3401 return true;
3402 }
3403
3404
3406 return false;
3407
3408
3409
3412 if (TryPSet == CandPSet) {
3414 Reason);
3415 }
3416
3417 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3418 std::numeric_limits::max();
3419
3420 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3421 std::numeric_limits::max();
3422
3423
3426 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3427}
3428
3431}
3432
3433
3434
3435
3436
3437
3438
3439
3442
3443 if (MI->isCopy()) {
3444 unsigned ScheduledOper = isTop ? 1 : 0;
3445 unsigned UnscheduledOper = isTop ? 0 : 1;
3446
3447
3448 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3449 return 1;
3450
3451
3453 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3454 return AtBoundary ? -1 : 1;
3455 }
3456
3457 if (MI->isMoveImmediate()) {
3458
3459
3460
3461 bool DoBias = true;
3463 if (Op.isReg() && .getReg().isPhysical()) {
3464 DoBias = false;
3465 break;
3466 }
3467 }
3468
3469 if (DoBias)
3470 return isTop ? -1 : 1;
3471 }
3472
3473 return 0;
3474}
3475}
3476
3478 bool AtTop,
3481 Cand.SU = SU;
3482 Cand.AtTop = AtTop;
3483 if (DAG->isTrackingPressure()) {
3484 if (AtTop) {
3488 DAG->getRegionCriticalPSets(),
3489 DAG->getRegPressure().MaxSetPressure);
3490 } else {
3494 &DAG->getPressureDiff(Cand.SU),
3496 DAG->getRegionCriticalPSets(),
3497 DAG->getRegPressure().MaxSetPressure);
3498 } else {
3501 DAG->getPressureDiff(Cand.SU),
3503 DAG->getRegionCriticalPSets(),
3504 DAG->getRegPressure().MaxSetPressure);
3505 }
3506 }
3507 }
3509 << " Try SU(" << Cand.SU->NodeNum << ") "
3512}
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3528
3531 return true;
3532 }
3533
3534
3538
3539
3543 DAG->MF))
3545
3546
3550 DAG->MF))
3552
3553
3554
3555
3556
3557
3558 bool SameBoundary = Zone != nullptr;
3559 if (SameBoundary) {
3560
3561
3562
3566
3567
3571 }
3572
3573
3574
3575
3576
3577
3578
3579 const SUnit *CandNextClusterSU =
3581 const SUnit *TryCandNextClusterSU =
3583 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3584 Cand.SU == CandNextClusterSU,
3585 TryCand, Cand, Cluster))
3587
3588 if (SameBoundary) {
3589
3592 TryCand, Cand, Weak))
3594 }
3595
3596
3600 DAG->MF))
3602
3603 if (SameBoundary) {
3604
3613
3614
3615
3619
3620
3624 return true;
3625 }
3626 }
3627
3628 return false;
3629}
3630
3631
3632
3633
3634
3635
3640
3642
3644 for (SUnit *SU : Q) {
3645
3647 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3648
3650 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3651
3656 }
3657 }
3658}
3659
3660
3662
3663
3664 if (SUnit *SU = Bot.pickOnlyChoice()) {
3665 IsTopNode = false;
3667 return SU;
3668 }
3669 if (SUnit *SU = Top.pickOnlyChoice()) {
3670 IsTopNode = true;
3672 return SU;
3673 }
3674
3675
3677 setPolicy(BotPolicy, false, Bot, &Top);
3678
3679
3681 setPolicy(TopPolicy, false, Top, &Bot);
3682
3683
3685 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3686 BotCand.Policy != BotPolicy) {
3688 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3689 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3690 } else {
3692#ifndef NDEBUG
3696 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3697 assert(TCand.SU == BotCand.SU &&
3698 "Last pick result should correspond to re-picking right now");
3699 }
3700#endif
3701 }
3702
3703
3705 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3706 TopCand.Policy != TopPolicy) {
3708 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3709 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3710 } else {
3712#ifndef NDEBUG
3716 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3717 assert(TCand.SU == TopCand.SU &&
3718 "Last pick result should correspond to re-picking right now");
3719 }
3720#endif
3721 }
3722
3723
3724 assert(BotCand.isValid());
3725 assert(TopCand.isValid());
3728 if (tryCandidate(Cand, TopCand, nullptr)) {
3731 }
3732
3733 IsTopNode = Cand.AtTop;
3735 return Cand.SU;
3736}
3737
3738
3740 if (DAG->top() == DAG->bottom()) {
3741 assert(Top.Available.empty() && Top.Pending.empty() &&
3742 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3743 return nullptr;
3744 }
3746 do {
3748 SU = Top.pickOnlyChoice();
3749 if (!SU) {
3751 TopCand.reset(NoPolicy);
3752 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3753 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3755 SU = TopCand.SU;
3756 }
3757 IsTopNode = true;
3759 SU = Bot.pickOnlyChoice();
3760 if (!SU) {
3762 BotCand.reset(NoPolicy);
3763 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3764 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3766 SU = BotCand.SU;
3767 }
3768 IsTopNode = false;
3769 } else {
3770 SU = pickNodeBidirectional(IsTopNode);
3771 }
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3790 Top.removeReady(SU);
3792 Bot.removeReady(SU);
3793
3796 return SU;
3797}
3798
3801 if (!isTop)
3802 ++InsertPos;
3804
3805
3806
3807 for (SDep &Dep : Deps) {
3810 continue;
3811 SUnit *DepSU = Dep.getSUnit();
3812 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3813 continue;
3815 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3816 continue;
3818 DAG->dumpNode(*Dep.getSUnit()));
3820 }
3821}
3822
3823
3824
3825
3826
3827
3828
3829
3831 if (IsTopNode) {
3833 Top.bumpNode(SU);
3835 reschedulePhysReg(SU, true);
3836 } else {
3838 Bot.bumpNode(SU);
3840 reschedulePhysReg(SU, false);
3841 }
3842}
3843
3844
3845
3849
3850
3851
3852
3853
3855
3857
3859 if (!MacroFusions.empty())
3861 return DAG;
3862}
3863
3866}
3867
3871
3872
3873
3874
3875
3877 DAG = Dag;
3880
3884
3885
3886
3888 if (!Top.HazardRec) {
3890 }
3891 if (!Bot.HazardRec) {
3893 }
3894}
3895
3898 unsigned NumRegionInstrs) {
3900
3901
3902
3905
3906
3908
3909
3919 }
3920}
3921
3924
3925
3926 for (const SUnit *SU : Bot.Available) {
3929 }
3933 }
3934}
3935
3936
3937
3938
3939
3940
3943
3946 return true;
3947 }
3948
3949
3950 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3951 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3953
3954
3955 const SUnit *CandNextClusterSU =
3957 const SUnit *TryCandNextClusterSU =
3959 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3960 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
3962
3963
3971
3972
3973
3975
3979 }
3980
3981
3984 return true;
3985 }
3986
3987 return false;
3988}
3989
3993 for (SUnit *SU : Q) {
3995 TryCand.SU = SU;
3998 if (tryCandidate(Cand, TryCand)) {
4001 }
4002 }
4003}
4004
4005
4007
4008
4009
4010
4011
4012 if (SUnit *SU = Bot.pickOnlyChoice()) {
4013 IsTopNode = false;
4015 return SU;
4016 }
4017 if (SUnit *SU = Top.pickOnlyChoice()) {
4018 IsTopNode = true;
4020 return SU;
4021 }
4022
4023
4025 setPolicy(BotPolicy, true, Bot, &Top);
4026
4027
4029 setPolicy(TopPolicy, true, Top, &Bot);
4030
4031
4033 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4034 BotCand.Policy != BotPolicy) {
4036 pickNodeFromQueue(Bot, BotCand);
4037 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4038 } else {
4040#ifndef NDEBUG
4044 pickNodeFromQueue(Bot, BotCand);
4045 assert(TCand.SU == BotCand.SU &&
4046 "Last pick result should correspond to re-picking right now");
4047 }
4048#endif
4049 }
4050
4051
4053 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4054 TopCand.Policy != TopPolicy) {
4056 pickNodeFromQueue(Top, TopCand);
4057 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4058 } else {
4060#ifndef NDEBUG
4064 pickNodeFromQueue(Top, TopCand);
4065 assert(TCand.SU == TopCand.SU &&
4066 "Last pick result should correspond to re-picking right now");
4067 }
4068#endif
4069 }
4070
4071
4072 assert(BotCand.isValid());
4073 assert(TopCand.isValid());
4076 if (tryCandidate(Cand, TopCand)) {
4079 }
4080
4081 IsTopNode = Cand.AtTop;
4083 return Cand.SU;
4084}
4085
4086
4088 if (DAG->top() == DAG->bottom()) {
4089 assert(Top.Available.empty() && Top.Pending.empty() &&
4090 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4091 return nullptr;
4092 }
4094 do {
4096 SU = Bot.pickOnlyChoice();
4097 if (SU) {
4099 } else {
4101 BotCand.reset(NoPolicy);
4102
4103
4104 setPolicy(BotCand.Policy, true, Bot, nullptr);
4105 pickNodeFromQueue(Bot, BotCand);
4106 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4108 SU = BotCand.SU;
4109 }
4110 IsTopNode = false;
4112 SU = Top.pickOnlyChoice();
4113 if (SU) {
4115 } else {
4117 TopCand.reset(NoPolicy);
4118
4119
4120 setPolicy(TopCand.Policy, true, Top, nullptr);
4121 pickNodeFromQueue(Top, TopCand);
4122 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4124 SU = TopCand.SU;
4125 }
4126 IsTopNode = true;
4127 } else {
4128 SU = pickNodeBidirectional(IsTopNode);
4129 }
4131
4133 Top.removeReady(SU);
4135 Bot.removeReady(SU);
4136
4139 return SU;
4140}
4141
4142
4143
4145 if (IsTopNode) {
4147 Top.bumpNode(SU);
4148 } else {
4150 Bot.bumpNode(SU);
4151 }
4152}
4153
4156 new ScheduleDAGMI(C, std::make_unique(C),
4157 true);
4159
4161 if (!MacroFusions.empty())
4163 return DAG;
4164}
4165
4166
4167
4168
4169
4170namespace {
4171
4172
4173struct ILPOrder {
4175 const BitVector *ScheduledTrees = nullptr;
4176 bool MaximizeILP;
4177
4178 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4179
4180
4181
4182
4183 bool operator()(const SUnit *A, const SUnit *B) const {
4184 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4185 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4186 if (SchedTreeA != SchedTreeB) {
4187
4188 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4189 return ScheduledTrees->test(SchedTreeB);
4190
4191
4196 }
4197 }
4198 if (MaximizeILP)
4200 else
4202 }
4203};
4204
4205
4208 ILPOrder Cmp;
4209
4210 std::vector<SUnit*> ReadyQ;
4211
4212public:
4213 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4214
4222 }
4223
4224 void registerRoots() override {
4225
4226 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4227 }
4228
4229
4230
4231
4232
4233 SUnit *pickNode(bool &IsTopNode) override {
4234 if (ReadyQ.empty()) return nullptr;
4235 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4236 SUnit *SU = ReadyQ.back();
4237 ReadyQ.pop_back();
4238 IsTopNode = false;
4240 << "SU(" << SU->NodeNum << ") "
4243 << " @"
4246 << '\n'
4247 << "Scheduling " << *SU->getInstr());
4248 return SU;
4249 }
4250
4251
4252 void scheduleTree(unsigned SubtreeID) override {
4253 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4254 }
4255
4256
4257
4258 void schedNode(SUnit *SU, bool IsTopNode) override {
4259 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4260 }
4261
4262 void releaseTopNode(SUnit *) override { }
4263
4264 void releaseBottomNode(SUnit *SU) override {
4265 ReadyQ.push_back(SU);
4266 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4267 }
4268};
4269
4270}
4271
4273 return new ScheduleDAGMILive(C, std::make_unique(true));
4274}
4276 return new ScheduleDAGMILive(C, std::make_unique(false));
4277}
4278
4283
4284
4285
4286
4287
4288#ifndef NDEBUG
4289namespace {
4290
4291
4292
4293template
4294struct SUnitOrder {
4296 if (IsReverse)
4297 return A->NodeNum > B->NodeNum;
4298 else
4299 return A->NodeNum < B->NodeNum;
4300 }
4301};
4302
4303
4305 bool IsAlternating;
4306 bool IsTopDown;
4307
4308
4309
4310
4312 TopQ;
4313
4314
4316 BottomQ;
4317
4318public:
4319 InstructionShuffler(bool alternate, bool topdown)
4320 : IsAlternating(alternate), IsTopDown(topdown) {}
4321
4324 BottomQ.clear();
4325 }
4326
4327
4328
4329
4330 SUnit *pickNode(bool &IsTopNode) override {
4332 if (IsTopDown) {
4333 do {
4334 if (TopQ.empty()) return nullptr;
4335 SU = TopQ.top();
4336 TopQ.pop();
4338 IsTopNode = true;
4339 } else {
4340 do {
4341 if (BottomQ.empty()) return nullptr;
4342 SU = BottomQ.top();
4343 BottomQ.pop();
4345 IsTopNode = false;
4346 }
4347 if (IsAlternating)
4348 IsTopDown = !IsTopDown;
4349 return SU;
4350 }
4351
4352 void schedNode(SUnit *SU, bool IsTopNode) override {}
4353
4354 void releaseTopNode(SUnit *SU) override {
4355 TopQ.push(SU);
4356 }
4357 void releaseBottomNode(SUnit *SU) override {
4358 BottomQ.push(SU);
4359 }
4360};
4361
4362}
4363
4365 bool Alternate =
4369 C, std::make_unique(Alternate, TopDown));
4370}
4371
4373 "shuffle", "Shuffle machine instructions alternating directions",
4375#endif
4376
4377
4378
4379
4380
4381#ifndef NDEBUG
4382namespace llvm {
4383
4386
4387template<>
4390
4392 return std::string(G->MF.getName());
4393 }
4394
4396 return true;
4397 }
4398
4401 return false;
4404 }
4405
4406
4407
4412 return "color=cyan,style=dashed";
4414 return "color=blue,style=dashed";
4415 return "";
4416 }
4417
4419 std::string Str;
4424 SS << "SU:" << SU->NodeNum;
4425 if (DFS)
4427 return Str;
4428 }
4429
4431 return G->getGraphNodeLabel(SU);
4432 }
4433
4435 std::string Str("shape=Mrecord");
4439 if (DFS) {
4440 Str += ",style=filled,fillcolor=\"#";
4442 Str += '"';
4443 }
4444 return Str;
4445 }
4446};
4447
4448}
4449#endif
4450
4451
4452
4454#ifndef NDEBUG
4456#else
4457 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4458 << "systems with Graphviz or gv!\n";
4459#endif
4460}
4461
4462
4464 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4465}
4466
4467
4468
4469
4470
4473 return A.first < B.first;
4474}
4475
4476unsigned ResourceSegments::getFirstAvailableAt(
4477 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4479 IntervalBuilder) const {
4480 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4482 "Cannot execute on an un-sorted set of intervals.");
4483
4484
4485
4486 if (AcquireAtCycle == ReleaseAtCycle)
4487 return CurrCycle;
4488
4489 unsigned RetCycle = CurrCycle;
4491 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4492 for (auto &Interval : _Intervals) {
4494 continue;
4495
4496
4497
4499 "Invalid intervals configuration.");
4500 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4501 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4502 }
4503 return RetCycle;
4504}
4505
4507 const unsigned CutOff) {
4508 assert(A.first <= A.second && "Cannot add negative resource usage");
4509 assert(CutOff > 0 && "0-size interval history has no use.");
4510
4511
4512
4513
4514
4516 return;
4517
4521 }) &&
4522 "A resource is being overwritten");
4523 _Intervals.push_back(A);
4524
4525 sortAndMerge();
4526
4527
4528
4529 while (_Intervals.size() > CutOff)
4530 _Intervals.pop_front();
4531}
4532
4535 assert(A.first <= A.second && "Invalid interval");
4536 assert(B.first <= B.second && "Invalid interval");
4537
4538
4539 if ((A.first == B.first) || (A.second == B.second))
4540 return true;
4541
4542
4543
4544 if ((A.first > B.first) && (A.second < B.second))
4545 return true;
4546
4547
4548
4549 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4550 return true;
4551
4552
4553
4554 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4555 return true;
4556
4557 return false;
4558}
4559
4560void ResourceSegments::sortAndMerge() {
4561 if (_Intervals.size() <= 1)
4562 return;
4563
4564
4566
4567
4568 auto next = std::next(std::begin(_Intervals));
4569 auto E = std::end(_Intervals);
4570 for (; next != E; ++next) {
4571 if (std::prev(next)->second >= next->first) {
4572 next->first = std::prev(next)->first;
4573 _Intervals.erase(std::prev(next));
4574 continue;
4575 }
4576 }
4577}
MachineInstrBuilder MachineInstrBuilder & DefMI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
COFF::MachineTypes Machine
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Class recording the (high level) value of a variable.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
reverse_iterator rend() const
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
reverse_iterator rbegin() const
bool test(unsigned Idx) const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void clear()
clear - Removes all bits from the bitvector.
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
nonconst_iterator getNonConstIterator() const
Representation of each machine instruction.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
List of PressureChanges in order of increasing, unique PSetID.
void dump(const TargetRegisterInfo &TRI) const
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
void clear()
clear - Erase all elements from the queue.
Helpers for implementing custom MachineSchedStrategy classes.
ArrayRef< SUnit * > elements()
bool isInQueue(SUnit *SU) const
std::vector< SUnit * >::iterator iterator
StringRef getName() const
iterator remove(iterator I)
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
void setPos(MachineBasicBlock::const_iterator Pos)
ArrayRef< unsigned > getLiveThru() const
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
void advance()
Advance across the current instruction.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
@ Weak
Arbitrary weak DAG edge.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
unsigned getReg() const
Returns the register associated with this edge.
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
bool isArtificialDep() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
unsigned short Latency
Node latency.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
bool hasPhysRegDefs
Has physreg defs that are being used.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
SmallVector< SDep, 4 > Succs
All sunit successors.
bool hasReservedResource
Uses a reserved resource.
bool isBottomReady() const
bool hasPhysRegUses
Has physreg uses.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
void dumpScheduledState() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
void clear()
Clear the results.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
MachineInstr * FirstDbgValue
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void setDumpDirection(DumpDirection D)
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
bool ShouldTrackLaneMasks
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
PressureDiffs SUPressureDiffs
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void collectVRegUses(SUnit &SU)
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
~ScheduleDAGMILive() override
void dump() const override
BitVector & getScheduledTrees()
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
const SUnit * NextClusterSucc
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator bottom() const
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
const SUnit * getNextClusterPred() const
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
std::reverse_iterator< const_iterator > const_reverse_iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
iterator find(const KeyT &Key)
Find an element by its key.
void clear()
Clears the set.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
iterator_base< SparseMultiSet * > iterator
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
Provide an instruction scheduling machine model to CodeGen passes.
const char * getResourceName(unsigned PIdx) const
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const InstrItineraryData * getInstrItineraries() const
bool enableIntervals() const
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic post-ra scheduling policy within a region.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
int getNumOccurrences() const
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
unsigned getWeakLeft(const SUnit *SU, bool isTop)
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
void initializePostMachineSchedulerPass(PassRegistry &)
cl::opt< bool > VerifyScheduling
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
cl::opt< bool > ViewMISchedDAGs
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
void sort(IteratorTy Start, IteratorTy End)
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void initializeMachineSchedulerPass(PassRegistry &)
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
cl::opt< MISched::Direction > PreRADirection
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
DOTGraphTraits(bool isSimple=false)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static bool renderGraphFromBottomUp()
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
static constexpr LaneBitmask getNone()
const unsigned * SubUnitsIdxBegin
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
virtual ~MachineSchedContext()
bool DisableLatencyHeuristic
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
PressureChange CriticalMax
PressureChange CurrentMax
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Summarize the unscheduled region.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
bool IsAcyclicLatencyLimited
An individual mapping from virtual register number to SUnit.