LLVM: lib/Target/AMDGPU/GCNSchedStrategy.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
30
31#define DEBUG_TYPE "machine-scheduler"
32
33using namespace llvm;
34
36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
37 cl::desc("Disable unclustered high register pressure "
38 "reduction scheduling stage."),
40
42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
43 cl::desc("Disable clustered low occupancy "
44 "rescheduling for ILP scheduling stage."),
46
48 "amdgpu-schedule-metric-bias", cl::Hidden,
50 "Sets the bias which adds weight to occupancy vs latency. Set it to "
51 "100 to chase the occupancy only."),
53
56 cl::desc("Relax occupancy targets for kernels which are memory "
57 "bound (amdgpu-membound-threshold), or "
58 "Wave Limited (amdgpu-limit-wave-threshold)."),
60
62 "amdgpu-use-amdgpu-trackers", cl::Hidden,
63 cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
65
67
70 DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {
71}
72
75
77
79
84
86
87
88
89
90
95
99 } else {
100
101
102
103 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
104 "VGPRCriticalLimit calculation method.\n");
105
109 VGPRBudget = std::max(VGPRBudget, Granule);
111 }
112
113
118
123}
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
143 return false;
144
145
146
148 if (.isReg() || Op.isImplicit())
149 continue;
150 if (Op.getReg().isPhysical() ||
151 (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
152 return false;
153 }
154 return true;
155}
156
159 std::vector &Pressure, std::vector &MaxPressure,
162
163
166 AtTop
169
170 return;
171 }
172
173
174 Pressure.resize(4, 0);
177 if (AtTop) {
180 } else {
182 TempUpwardTracker.recede(*MI);
183 NewPressure = TempUpwardTracker.getPressure();
184 }
185 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();
186 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
188 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
189}
190
191
192
194 return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
195}
196
198 bool AtTop,
201 unsigned SGPRPressure,
202 unsigned VGPRPressure, bool IsBottomUp) {
203 Cand.SU = SU;
204 Cand.AtTop = AtTop;
205
207 return;
208
211
212
213
214
215
216
217
218
219
220
221
222
226 } else {
227
229 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
230 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
231
233 if (!Diff.isValid())
234 continue;
235
236
238 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
239 }
240
241#ifdef EXPENSIVE_CHECKS
242 std::vector CheckPressure, CheckMaxPressure;
245 if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
246 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
247 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
248 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
249 errs() << "Register Pressure is inaccurate when calculated through "
250 "PressureDiff\n"
251 << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
252 << ", expected "
253 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"
254 << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
255 << ", expected "
256 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
258 }
259#endif
260 }
261
262 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
263 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
264
265
266
267
268
269
270
271
272
273 const unsigned MaxVGPRPressureInc = 16;
274 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
275 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
276
277
278
279
280
281
282
283
284
285
286 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
290 }
291
292 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
296 }
297
298
299
300
301
302
305
306 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
308 if (SGPRDelta > VGPRDelta) {
310 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
312 } else {
314 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
316 }
317 }
318}
319
320
321
326 bool IsBottomUp) {
329 unsigned SGPRPressure = 0;
330 unsigned VGPRPressure = 0;
333 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
334 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
335 } else {
339 SGPRPressure = T->getPressure().getSGPRNum();
340 VGPRPressure = T->getPressure().getArchVGPRNum();
341 }
342 }
344 for (SUnit *SU : Q) {
345
347 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
348 VGPRPressure, IsBottomUp);
349
353
358 }
359 }
360}
361
362
363
365
366
368 IsTopNode = false;
369 return SU;
370 }
372 IsTopNode = true;
373 return SU;
374 }
375
376
379
380
383
384
390 true);
392 } else {
394#ifndef NDEBUG
399 true);
401 "Last pick result should correspond to re-picking right now");
402 }
403#endif
404 }
405
406
412 false);
414 } else {
416#ifndef NDEBUG
421 false);
423 "Last pick result should correspond to re-picking right now");
424 }
425#endif
426 }
427
428
436 }
438
439 IsTopNode = Cand.AtTop;
440 return Cand.SU;
441}
442
443
444
449 return nullptr;
450 }
452 do {
455 if (!SU) {
459 false);
462 }
463 IsTopNode = true;
466 if (!SU) {
470 true);
473 }
474 IsTopNode = false;
475 } else {
477 }
479
484
487 return SU;
488}
489
495 }
496
498}
499
503}
504
509 else
511
513}
514
518}
519
523}
524
533}
534
538}
539
543
546 return true;
547 }
548
549
554
555
559
560 bool SameBoundary = Zone != nullptr;
561 if (SameBoundary) {
562
566
567
576
577
578 if (tryLatency(TryCand, Cand, *Zone))
580
581
585 }
586
587
588
589
590
591
592
593 const SUnit *CandNextClusterSU =
595 const SUnit *TryCandNextClusterSU =
597 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
598 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
600
601
606
607
612
613 if (SameBoundary) {
614
618 return true;
619 }
620 }
621 return false;
622}
623
628}
629
630
631
632
633
634
635
636
637
638
639
643
646 return true;
647 }
648
649
653
655
659
660
664 }
665
666
667
668 const SUnit *CandNextClusterSU =
670 const SUnit *TryCandNextClusterSU =
672 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
673 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
675
676
677
678
679
680
681 bool SameBoundary = Zone != nullptr;
682 if (SameBoundary) {
683
684
685
689
690
691
692
693
694 bool TryMayLoad =
697
698 if (TryMayLoad || CandMayLoad) {
699 bool TryLongLatency =
701 bool CandLongLatency =
703
704 if (tryGreater(Zone->isTop() ? TryLongLatency : CandLongLatency,
705 Zone->isTop() ? CandLongLatency : TryLongLatency, TryCand,
708 }
709
713 }
714
715 if (SameBoundary) {
716
720 }
721
722
727
728 if (SameBoundary) {
729
738
739
740
744
745
749 return true;
750 }
751 }
752
753 return false;
754}
755
760 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
761 RegionLiveOuts(this, true) {
762
763 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
766 if (MinOccupancy != StartingOccupancy)
767 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
768 << ".\n");
769 }
770}
771
772std::unique_ptr
773GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
774 switch (SchedStageID) {
776 return std::make_unique(SchedStageID, *this);
778 return std::make_unique(SchedStageID, *this);
780 return std::make_unique(SchedStageID, *this);
782 return std::make_unique(SchedStageID, *this);
784 return std::make_unique(SchedStageID, *this);
786 return std::make_unique(SchedStageID,
787 *this);
788 }
789
791}
792
794
795
797}
798
800GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
803 return RPTracker.moveMaxPressure();
804}
805
808 auto REnd = RegionEnd == RegionBegin->getParent()->end()
809 ? std::prev(RegionEnd)
810 : RegionEnd;
812}
813
814void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
817
818
819
820
821
822
823
824
825
829 if (!Candidate->empty() && Candidate->pred_size() == 1) {
832 OnlySucc = Candidate;
833 }
834 }
835
836
837 size_t CurRegion = RegionIdx;
838 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
839 if (Regions[CurRegion].first->getParent() != MBB)
840 break;
841 --CurRegion;
842
844 auto LiveInIt = MBBLiveIns.find(MBB);
845 auto &Rgn = Regions[CurRegion];
847 if (LiveInIt != MBBLiveIns.end()) {
848 auto LiveIn = std::move(LiveInIt->second);
850 MBBLiveIns.erase(LiveInIt);
851 } else {
852 I = Rgn.first;
853 auto LRS = BBLiveInMap.lookup(NonDbgMI);
854#ifdef EXPENSIVE_CHECKS
856#endif
858 }
859
860 for (;;) {
862
863 if (Regions[CurRegion].first == I || NonDbgMI == I) {
864 LiveIns[CurRegion] = RPTracker.getLiveRegs();
866 }
867
868 if (Regions[CurRegion].second == I) {
869 Pressure[CurRegion] = RPTracker.moveMaxPressure();
870 if (CurRegion-- == RegionIdx)
871 break;
872 }
875 }
876
877 if (OnlySucc) {
881 }
883 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
884 }
885}
886
888GCNScheduleDAGMILive::getRegionLiveInMap() const {
889 assert(!Regions.empty());
890 std::vector<MachineInstr *> RegionFirstMIs;
891 RegionFirstMIs.reserve(Regions.size());
892 auto I = Regions.rbegin(), E = Regions.rend();
893 auto *BB = I->first->getParent();
894 do {
896 RegionFirstMIs.push_back(MI);
897 do {
898 ++I;
899 } while (I != E && I->first->getParent() == BB);
900 } while (I != E);
902}
903
905GCNScheduleDAGMILive::getRegionLiveOutMap() const {
906 assert(!Regions.empty());
907 std::vector<MachineInstr *> RegionLastMIs;
908 RegionLastMIs.reserve(Regions.size());
911
913}
914
916 IdxToInstruction.clear();
917
918 RegionLiveRegMap =
919 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
920 for (unsigned I = 0; I < DAG->Regions.size(); I++) {
922 IsLiveOut
924 : &*DAG->Regions[I].first;
925 IdxToInstruction[I] = RegionKey;
926 }
927}
928
930
931
932
933 LiveIns.resize(Regions.size());
934 Pressure.resize(Regions.size());
935 RescheduleRegions.resize(Regions.size());
936 RegionsWithHighRP.resize(Regions.size());
937 RegionsWithExcessRP.resize(Regions.size());
938 RegionsWithMinOcc.resize(Regions.size());
939 RegionsWithIGLPInstrs.resize(Regions.size());
940 RescheduleRegions.set();
941 RegionsWithHighRP.reset();
942 RegionsWithExcessRP.reset();
943 RegionsWithMinOcc.reset();
944 RegionsWithIGLPInstrs.reset();
945
946 runSchedStages();
947}
948
949void GCNScheduleDAGMILive::runSchedStages() {
950 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
951
952 if (!Regions.empty()) {
953 BBLiveInMap = getRegionLiveInMap();
956 }
957
961 if (!Stage->initGCNSchedStage())
962 continue;
963
964 for (auto Region : Regions) {
967
968 if (!Stage->initGCNRegion()) {
969 Stage->advanceRegion();
971 continue;
972 }
973
978 &LiveIns[Stage->getRegionIdx()];
979
980 reinterpret_cast<GCNRPTracker *>(DownwardTracker)
981 ->reset(MRI, *RegionLiveIns);
982 reinterpret_cast<GCNRPTracker *>(UpwardTracker)
984 Stage->getRegionIdx()));
985 }
986
988 Stage->finalizeGCNRegion();
989 }
990
991 Stage->finalizeGCNSchedStage();
992 }
993}
994
995#ifndef NDEBUG
997 switch (StageID) {
999 OS << "Max Occupancy Initial Schedule";
1000 break;
1002 OS << "Unclustered High Register Pressure Reschedule";
1003 break;
1005 OS << "Clustered Low Occupancy Reschedule";
1006 break;
1008 OS << "Pre-RA Rematerialize";
1009 break;
1011 OS << "Max ILP Initial Schedule";
1012 break;
1014 OS << "Max memory clause Initial Schedule";
1015 break;
1016 }
1017
1018 return OS;
1019}
1020#endif
1021
1023 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
1024 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
1025
1028 return false;
1029
1031 return true;
1032}
1033
1036 return false;
1037
1039 return false;
1040
1041 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
1042 return false;
1043
1047
1048 InitialOccupancy = DAG.MinOccupancy;
1049
1050
1055
1058 << "Retrying function scheduling without clustering. "
1059 "Aggressivly try to reduce register pressure to achieve occupancy "
1060 << DAG.MinOccupancy << ".\n");
1061
1062 return true;
1063}
1064
1067 return false;
1068
1070 return false;
1071
1072
1073
1074
1075 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
1076 return false;
1077
1079 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
1080 << DAG.MinOccupancy << ".\n");
1081 return true;
1082}
1083
1086 return false;
1087
1088 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
1089 return false;
1090
1092
1094 DAG.MinOccupancy)
1095 return false;
1096
1097
1098
1099
1100
1102
1103 collectRematerializableInstructions();
1104 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
1105 return false;
1106
1108 dbgs() << "Retrying function scheduling with improved occupancy of "
1109 << DAG.MinOccupancy << " from rematerializing\n");
1110 return true;
1111}
1112
1116}
1117
1121 if (DAG.MinOccupancy > InitialOccupancy) {
1122 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
1123 DAG.RegionsWithMinOcc[IDX] =
1124 DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
1125
1127 << " stage successfully increased occupancy to "
1128 << DAG.MinOccupancy << '\n');
1129 }
1130
1132}
1133
1135
1138
1139 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
1141
1142
1144 return false;
1145
1146 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
1149 << "\n From: " << *DAG.begin() << " To: ";
1151 else dbgs() << "End";
1152 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
1153
1154
1162 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
1163 }
1164 } else {
1167 }
1168
1170
1172 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
1174 << "Region live-in pressure: "
1177
1180
1190 }
1191
1192 return true;
1193}
1194
1196
1197
1199 DAG.MinOccupancy <= InitialOccupancy) &&
1201 return false;
1202
1204}
1205
1207
1208
1209
1210
1211
1213 return false;
1214
1216}
1217
1220 return false;
1221
1223}
1224
1228
1231
1232
1237}
1238
1244
1245
1246
1248
1252
1255}
1256
1258
1260
1263
1269
1270
1271 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
1272 return;
1273 }
1274
1275 unsigned TargetOccupancy =
1277 unsigned WavesAfter =
1279 unsigned WavesBefore =
1281 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
1282 << ", after " << WavesAfter << ".\n");
1283
1284
1285
1286
1287
1288 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1289
1290
1291
1292 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
1294 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
1296 NewOccupancy = WavesAfter;
1297 }
1298
1299 if (NewOccupancy < DAG.MinOccupancy) {
1300 DAG.MinOccupancy = NewOccupancy;
1302 DAG.RegionsWithMinOcc.reset();
1303 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
1304 << DAG.MinOccupancy << ".\n");
1305 }
1306
1307
1309
1310
1313
1321 }
1322
1323
1324
1327 } else {
1331 }
1332}
1333
1334unsigned
1338 unsigned ReadyCycle = CurrCycle;
1339 for (auto &D : SU.Preds) {
1340 if (D.isAssignedRegDep()) {
1342 unsigned Latency = SM.computeInstrLatency(DefMI);
1344 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
1345 }
1346 }
1347 ReadyCycles[SU.NodeNum] = ReadyCycle;
1348 return ReadyCycle;
1349}
1350
1351#ifndef NDEBUG
1354 std::pair<MachineInstr *, unsigned> B) const {
1355 return A.second < B.second;
1356 }
1357};
1358
1361 if (ReadyCycles.empty())
1362 return;
1363 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1364 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1365 << " ##################\n# Cycle #\t\t\tInstruction "
1366 " "
1367 " \n";
1368 unsigned IPrev = 1;
1369 for (auto &I : ReadyCycles) {
1370 if (I.second > IPrev + 1)
1371 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1372 << " CYCLES DETECTED ******************************\n\n";
1373 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1374 IPrev = I.second;
1375 }
1376}
1377#endif
1378
1381#ifndef NDEBUG
1383 ReadyCyclesSorted;
1384#endif
1386 unsigned SumBubbles = 0;
1388 unsigned CurrCycle = 0;
1389 for (auto &SU : InputSchedule) {
1390 unsigned ReadyCycle =
1392 SumBubbles += ReadyCycle - CurrCycle;
1393#ifndef NDEBUG
1394 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1395#endif
1396 CurrCycle = ++ReadyCycle;
1397 }
1398#ifndef NDEBUG
1401 dbgs() << "\n\t"
1402 << "Metric: "
1403 << (SumBubbles
1405 : 1)
1406 << "\n\n");
1407#endif
1408
1410}
1411
1414#ifndef NDEBUG
1416 ReadyCyclesSorted;
1417#endif
1419 unsigned SumBubbles = 0;
1421 unsigned CurrCycle = 0;
1424 if (!SU)
1425 continue;
1426 unsigned ReadyCycle =
1428 SumBubbles += ReadyCycle - CurrCycle;
1429#ifndef NDEBUG
1430 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1431#endif
1432 CurrCycle = ++ReadyCycle;
1433 }
1434#ifndef NDEBUG
1437 dbgs() << "\n\t"
1438 << "Metric: "
1439 << (SumBubbles
1441 : 1)
1442 << "\n\n");
1443#endif
1444
1446}
1447
1449 if (WavesAfter < DAG.MinOccupancy)
1450 return true;
1451
1452 return false;
1453}
1454
1457 return false;
1458
1460 return true;
1461
1463 return true;
1464
1465 return false;
1466}
1467
1469
1470
1474 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1475 return true;
1476 }
1477
1478
1480 return false;
1481
1484 << "\n\t *** In shouldRevertScheduling ***\n"
1485 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1490 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1492 unsigned OldMetric = MBefore.getMetric();
1493 unsigned NewMetric = MAfter.getMetric();
1494 unsigned WavesBefore =
1496 unsigned Profit =
1499 NewMetric) /
1501 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1502 << MAfter << "Profit: " << Profit << "\n");
1504}
1505
1508 return false;
1509
1511 return true;
1512
1514 return true;
1515
1516 return false;
1517}
1518
1521 return true;
1522
1524 return true;
1525
1526 return false;
1527}
1528
1531 return true;
1532
1533 return false;
1534}
1535
1537 unsigned WavesAfter) {
1539}
1540
1544 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1545 return true;
1546 }
1547
1548 return false;
1549}
1550
1554 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1559 int SkippedDebugInstr = 0;
1561 if (MI->isDebugInstr()) {
1562 ++SkippedDebugInstr;
1563 continue;
1564 }
1565
1569 if (->isDebugInstr())
1571 }
1572
1573
1574 for (auto &Op : MI->all_defs())
1575 Op.setIsUndef(false);
1578 if (->isDebugInstr()) {
1580
1583 } else {
1584
1586 }
1587 }
1591 }
1592
1593
1594
1595
1596 while (SkippedDebugInstr-- > 0)
1598
1599
1600
1601
1605 if (MI->isDebugInstr())
1606 continue;
1608 break;
1609 }
1610 }
1611
1612
1613
1615
1617}
1618
1619void PreRARematStage::collectRematerializableInstructions() {
1624 continue;
1625
1626
1629 continue;
1630
1633 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1634 continue;
1635
1637 if (Def->getParent() == UseI->getParent())
1638 continue;
1639
1640
1641
1642
1643 bool AddedToRematList = false;
1644 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1645 auto It = DAG.LiveIns[I].find(Reg);
1646 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1647 if (DAG.RegionsWithMinOcc[I]) {
1648 RematerializableInsts[I][Def] = UseI;
1649 AddedToRematList = true;
1650 }
1651
1652
1653
1654 RematDefToLiveInRegions[Def].push_back(I);
1655 }
1656 }
1657 if (!AddedToRematList)
1658 RematDefToLiveInRegions.erase(Def);
1659 }
1660}
1661
1662bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1664
1665
1667 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1668 NewRegions;
1673
1674 NewRegions.resize(DAG.Regions.size());
1675 NewRescheduleRegions.resize(DAG.Regions.size());
1676
1677
1679 for (const auto &It : RematDefToLiveInRegions)
1680 ImpactedRegions.insert(It.second.begin(), It.second.end());
1681
1682
1683
1684 for (auto Idx : ImpactedRegions) {
1685 NewPressure[Idx] = DAG.Pressure[Idx];
1686 NewLiveIns[Idx] = DAG.LiveIns[Idx];
1687 }
1688 NewRegions = DAG.Regions;
1689 NewRescheduleRegions.reset();
1690
1692 bool Improved = false;
1693 for (auto I : ImpactedRegions) {
1694 if (.RegionsWithMinOcc[I])
1695 continue;
1696
1697 Improved = false;
1699 int SGPRUsage = NewPressure[I].getSGPRNum();
1700
1701
1702
1704 break;
1705
1706
1707
1708 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1709 NewRescheduleRegions[I] = true;
1710 Improved = true;
1711 continue;
1712 }
1713
1714
1715
1716
1717 int TotalSinkableRegs = 0;
1718 for (const auto &It : RematerializableInsts[I]) {
1720 Register DefReg = Def->getOperand(0).getReg();
1721 TotalSinkableRegs +=
1723 }
1724 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1726
1727
1728 if (OptimisticOccupancy <= DAG.MinOccupancy)
1729 break;
1730
1731 unsigned ImproveOccupancy = 0;
1733 for (auto &It : RematerializableInsts[I]) {
1738
1739
1740
1741
1742 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1743 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1744 MachineInstr *NewMI = &*std::prev(InsertPos);
1748 InsertedMIToOldDef[NewMI] = Def;
1749
1750
1751
1752 DAG.updateRegionBoundaries(NewRegions, Def, nullptr,
1753 true);
1754
1755
1756 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1757
1759
1760
1761
1762 for (auto Idx : RematDefToLiveInRegions[Def]) {
1764 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1765
1767 } else {
1768
1771 NewRegions[Idx].first, NewRegions[Idx].second);
1772 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1773 RPT.advance(NewRegions[Idx].second);
1774 NewPressure[Idx] = RPT.moveMaxPressure();
1775 }
1776 }
1777
1779 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1780 if (ImproveOccupancy > DAG.MinOccupancy)
1781 break;
1782 }
1783
1784
1785 for (auto &Def : SinkedDefs)
1786 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1787 RematerializableInsts[TrackedIdx].erase(Def);
1788
1789 if (ImproveOccupancy <= DAG.MinOccupancy)
1790 break;
1791
1792 NewRescheduleRegions[I] = true;
1793 Improved = true;
1794 }
1795
1796 if (!Improved) {
1797
1798
1799 for (auto &Entry : InsertedMIToOldDef) {
1804 MI->eraseFromParent();
1808 }
1809 return false;
1810 }
1811
1812
1813 for (auto &Entry : InsertedMIToOldDef) {
1816
1817
1818 DAG.BBLiveInMap.erase(OldMI);
1819
1820
1826 }
1827
1828
1829 for (auto Idx : ImpactedRegions) {
1830 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1831 DAG.Pressure[Idx] = NewPressure[Idx];
1832 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1833 }
1834 DAG.Regions = NewRegions;
1835 DAG.RescheduleRegions = NewRescheduleRegions;
1836
1839
1842
1843 return true;
1844}
1845
1846
1847bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1849 return false;
1850
1852 if (MO.getReg().isVirtual())
1853 return false;
1854
1855 return true;
1856}
1857
1858
1859
1860
1861
1862void GCNScheduleDAGMILive::updateRegionBoundaries(
1866 unsigned I = 0, E = RegionBoundaries.size();
1867
1868 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1869 ++I;
1870
1872 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1873 return;
1874
1875 if (Removing && MI == RegionBoundaries[I].first &&
1876 MI == RegionBoundaries[I].second) {
1877
1878
1879 RegionBoundaries[I] =
1880 std::pair(MI->getParent()->end(), MI->getParent()->end());
1881 return;
1882 }
1883 if (MI == RegionBoundaries[I].first) {
1884 if (Removing)
1885 RegionBoundaries[I] =
1886 std::pair(std::next(MI), RegionBoundaries[I].second);
1887 else
1888
1890 RegionBoundaries[I].second);
1891 return;
1892 }
1893 if (Removing && MI == RegionBoundaries[I].second) {
1894 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
1895 return;
1896 }
1897 }
1898}
1899
1903 });
1904}
1905
1908 bool RemoveKillFlags)
1910
1913 if (HasIGLPInstrs) {
1914 SavedMutations.clear();
1917 }
1918
1920}
1921
1923 if (HasIGLPInstrs)
1925
1927}
MachineInstrBuilder MachineInstrBuilder & DefMI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool isIGLPMutationOnly(unsigned Opcode)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
const HexagonInstrInfo * TII
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
uint32_t getLDSSize() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
bool none() const
none - Returns true if none of the bits are set.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
This class represents an Operation in the Expression.
bool erase(const KeyT &Val)
bool advance(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state at the next MI.
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
GCNRegPressure getPressure() const
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
SUnit * pickNodeBidirectional(bool &IsTopNode)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
GCNSchedStageID getCurrentStage()
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
const unsigned HighRPVGPRBias
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
unsigned getTargetOccupancy()
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
unsigned getAddressableNumArchVGPRs() const
bool hasGFX90AInsts() const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
const SIInstrInfo * getInstrInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
succ_iterator succ_begin()
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
iterator_range< mop_iterator > operands()
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
MachineOperand class - Representation of each machine instruction operand.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
const TargetSchedModel & getSchedModel() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
unsigned getMaxWavesPerEU() const
unsigned getMinWavesPerEU() const
unsigned getMinAllowedOccupancy() const
void limitOccupancy(const MachineFunction &MF)
static unsigned getNumCoveredRegs(LaneBitmask LM)
static bool isVGPRClass(const TargetRegisterClass *RC)
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
A ScheduleDAG for scheduling lists of MachineInstr.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
PressureDiff & getPressureDiff(const SUnit *SU)
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
bool ShouldTrackLaneMasks
const RegPressureTracker & getBotRPTracker() const
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
const RegPressureTracker & getTopRPTracker() const
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
MachineBasicBlock::iterator bottom() const
void finishBlock() override
Cleans up after scheduling in the given block.
const SUnit * getNextClusterPred() const
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
bool isTriviallyReMaterializable(const MachineInstr &MI) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetInstrInfo * getInstrInfo() const
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
cl::opt< bool > VerifyScheduling
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getOccupancy(const GCNSubtarget &ST) const
unsigned getVGPRNum(bool UnifiedVGPRFile) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const
Compares this GCNRegpressure to O, returning true if this is less.
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
static constexpr LaneBitmask getNone()
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
bool DisableLatencyHeuristic
PressureChange CriticalMax
PressureChange CurrentMax
bool IsAcyclicLatencyLimited