LLVM: lib/CodeGen/MachinePipeliner.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
73#include "llvm/Config/llvm-config.h"
84#include
85#include
86#include
87#include
88#include
89#include
90#include
91#include
92#include
93#include
94#include
95#include
96#include
97#include
98
99using namespace llvm;
100
101#define DEBUG_TYPE "pipeliner"
102
103STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
104STATISTIC(NumPipelined, "Number of loops software pipelined");
105STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
106STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
107STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
108STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
109STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
110STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
111STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
112STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
113STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
114
115
117 cl::desc("Enable Software Pipelining"));
118
119
123
124
126 cl::desc("Size limit for the MII."),
128
129
130
132 cl::desc("Force pipeliner to use specified II."),
134
135
138 cl::desc("Maximum stages allowed in the generated scheduled."),
140
141
142
145 cl::desc("Prune dependences between unrelated Phi nodes."),
147
148
149
152 cl::desc("Prune loop carried order dependences."),
154
155#ifndef NDEBUG
157#endif
158
162
167
170 cl::desc("Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
173
177 "Use the experimental peeling code generator for software pipelining"));
178
180 cl::desc("Range to search for II"),
182
185 cl::desc("Limit register pressure of scheduled loop"));
186
190 cl::desc("Margin representing the unused percentage of "
191 "the register pressure limit"));
192
195 cl::desc("Use the MVE code generator for software pipelining"));
196
197namespace llvm {
198
199
202 cl::desc("Enable CopyToPhi DAG Mutation"));
203
204
205
207 "pipeliner-force-issue-width",
208 cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
210
211
214 cl::desc("Set how to use window scheduling algorithm."),
216 "Turn off window algorithm."),
218 "Use window algorithm after SMS algorithm fails."),
220 "Use window algorithm instead of SMS algorithm.")));
221
222}
223
224unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
226#ifndef NDEBUG
228#endif
230
232 "Modulo Software Pipelining", false, false)
239
240
242 if (skipFunction(mf.getFunction()))
243 return false;
244
246 return false;
247
248 if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
250 return false;
251
252 if (!mf.getSubtarget().enableMachinePipeliner())
253 return false;
254
255
256
257 if (mf.getSubtarget().useDFAforSMS() &&
258 (!mf.getSubtarget().getInstrItineraryData() ||
259 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
260 return false;
261
262 MF = &mf;
263 MLI = &getAnalysis().getLI();
264 MDT = &getAnalysis().getDomTree();
265 ORE = &getAnalysis().getORE();
266 TII = MF->getSubtarget().getInstrInfo();
267 RegClassInfo.runOnMachineFunction(*MF);
268
269 for (const auto &L : *MLI)
270 scheduleLoop(*L);
271
272 return false;
273}
274
275
276
277
278
279bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
280 bool Changed = false;
281 for (const auto &InnerLoop : L)
282 Changed |= scheduleLoop(*InnerLoop);
283
284#ifndef NDEBUG
285
287 if (Limit >= 0) {
289 return Changed;
291 }
292#endif
293
294 setPragmaPipelineOptions(L);
295 if (!canPipelineLoop(L)) {
296 LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
299 L.getStartLoc(), L.getHeader())
300 << "Failed to pipeline loop";
301 });
302
304 return Changed;
305 }
306
307 ++NumTrytoPipeline;
308 if (useSwingModuloScheduler())
309 Changed = swingModuloScheduler(L);
310
311 if (useWindowScheduler(Changed))
312 Changed = runWindowScheduler(L);
313
315 return Changed;
316}
317
318void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
319
322
324
325 if (LBLK == nullptr)
326 return;
327
329 if (BBLK == nullptr)
330 return;
331
333 if (TI == nullptr)
334 return;
335
337 if (LoopID == nullptr)
338 return;
339
342
344 MDNode *MD = dyn_cast(MDO);
345
346 if (MD == nullptr)
347 continue;
348
350
351 if (S == nullptr)
352 continue;
353
354 if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
356 "Pipeline initiation interval hint metadata should have two operands.");
358 mdconst::extract(MD->getOperand(1))->getZExtValue();
359 assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
360 } else if (S->getString() == "llvm.loop.pipeline.disable") {
362 }
363 }
364}
365
366
367
368
369bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
370 if (L.getNumBlocks() != 1) {
373 L.getStartLoc(), L.getHeader())
374 << "Not a single basic block: "
375 << ore::NV("NumBlocks", L.getNumBlocks());
376 });
377 return false;
378 }
379
383 L.getStartLoc(), L.getHeader())
384 << "Disabled by Pragma.";
385 });
386 return false;
387 }
388
389
390
395 LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
396 NumFailBranch++;
399 L.getStartLoc(), L.getHeader())
400 << "The branch can't be understood";
401 });
402 return false;
403 }
404
409 LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
410 NumFailLoop++;
413 L.getStartLoc(), L.getHeader())
414 << "The loop structure is not supported";
415 });
416 return false;
417 }
418
419 if (.getLoopPreheader()) {
420 LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
421 NumFailPreheader++;
424 L.getStartLoc(), L.getHeader())
425 << "No loop preheader found";
426 });
427 return false;
428 }
429
430
431 preprocessPhiNodes(*L.getHeader());
432 return true;
433}
434
438 *getAnalysis().getLIS().getSlotIndexes();
439
443 auto *RC = MRI.getRegClass(DefOp.getReg());
444
445 for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
448 continue;
449
450
451
452 Register NewReg = MRI.createVirtualRegister(RC);
460 RegOp.setReg(NewReg);
462 }
463 }
464}
465
466
467
468
469
470bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
471 assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
472
474 *this, L, getAnalysis().getLIS(), RegClassInfo,
476
478
479
480 SMS.startBlock(MBB);
481
482
483
488 ;
489
491 SMS.schedule();
492 SMS.exitRegion();
493
494 SMS.finishBlock();
495 return SMS.hasNewSchedule();
496}
497
507}
508
509bool MachinePipeliner::runWindowScheduler(MachineLoop &L) {
514 Context.PassConfig = &getAnalysis();
515 Context.AA = &getAnalysis().getAAResults();
516 Context.LIS = &getAnalysis().getLIS();
519 return WS.run();
520}
521
522bool MachinePipeliner::useSwingModuloScheduler() {
523
525}
526
527bool MachinePipeliner::useWindowScheduler(bool Changed) {
528
529
530
531
533 LLVM_DEBUG(dbgs() << "Window scheduling is disabled when "
534 "llvm.loop.pipeline.initiationinterval is set.\n");
535 return false;
536 }
537
540}
541
542void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
545 else if (II_setByPragma > 0)
546 MII = II_setByPragma;
547 else
548 MII = std::max(ResMII, RecMII);
549}
550
551void SwingSchedulerDAG::setMAX_II() {
554 else if (II_setByPragma > 0)
555 MAX_II = II_setByPragma;
556 else
558}
559
560
561
565 addLoopCarriedDependences(AA);
566 updatePhiDependences();
568 changeDependences();
569 postProcessDAG();
572
574 findCircuits(NodeSets);
576
577
578 unsigned ResMII = calculateResMII();
579 unsigned RecMII = calculateRecMII(NodeSets);
580
581 fuseRecs(NodeSets);
582
583
585 RecMII = 0;
586
587 setMII(ResMII, RecMII);
588 setMAX_II();
589
590 LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
591 << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
592
593
594 if (MII == 0) {
595 LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
596 NumFailZeroMII++;
597 Pass.ORE->emit([&]() {
600 << "Invalid Minimal Initiation Interval: 0";
601 });
602 return;
603 }
604
605
608 << ", we don't pipeline large loops\n");
609 NumFailLargeMaxMII++;
610 Pass.ORE->emit([&]() {
613 << "Minimal Initiation Interval too large: "
614 << ore::NV("MII", (int)MII) << " > "
616 << "Refer to -pipeliner-max-mii.";
617 });
618 return;
619 }
620
621 computeNodeFunctions(NodeSets);
622
623 registerPressureFilter(NodeSets);
624
625 colocateNodeSets(NodeSets);
626
627 checkNodeSets(NodeSets);
628
630 for (auto &I : NodeSets) {
631 dbgs() << " Rec NodeSet ";
632 I.dump();
633 }
634 });
635
637
638 groupRemainingNodes(NodeSets);
639
640 removeDuplicateNodes(NodeSets);
641
643 for (auto &I : NodeSets) {
644 dbgs() << " NodeSet ";
645 I.dump();
646 }
647 });
648
649 computeNodeOrder(NodeSets);
650
651
652 checkValidNodeOrder(Circuits);
653
655 Scheduled = schedulePipeline(Schedule);
656
657 if (!Scheduled){
659 NumFailNoSchedule++;
660 Pass.ORE->emit([&]() {
663 << "Unable to find schedule";
664 });
665 return;
666 }
667
669
670 if (numStages == 0) {
671 LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
672 NumFailZeroStage++;
673 Pass.ORE->emit([&]() {
676 << "No need to pipeline - no overlapped iterations in schedule.";
677 });
678 return;
679 }
680
683 << " : too many stages, abort\n");
684 NumFailLargeMaxStage++;
685 Pass.ORE->emit([&]() {
688 << "Too many stages in schedule: "
689 << ore::NV("numStages", (int)numStages) << " > "
691 << ". Refer to -pipeliner-max-stages.";
692 });
693 return;
694 }
695
696 Pass.ORE->emit([&]() {
699 << "Pipelined succesfully!";
700 });
701
702
704 std::vector<MachineInstr *> OrderedInsts;
708 OrderedInsts.push_back(SU->getInstr());
709 Cycles[SU->getInstr()] = Cycle;
710 Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
711 }
712 }
714 for (auto &KV : NewMIs) {
715 Cycles[KV.first] = Cycles[KV.second];
716 Stages[KV.first] = Stages[KV.second];
717 NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
718 }
719
721 std::move(Stages));
724 "Cannot serialize a schedule with InstrChanges!");
727 return;
728 }
729
738 } else {
742 }
743 ++NumPipelined;
744}
745
746
748 for (auto &KV : NewMIs)
750 NewMIs.clear();
751
752
754}
755
756
757
759 unsigned &InitVal, unsigned &LoopVal) {
760 assert(Phi.isPHI() && "Expecting a Phi.");
761
762 InitVal = 0;
763 LoopVal = 0;
764 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
765 if (Phi.getOperand(i + 1).getMBB() != Loop)
766 InitVal = Phi.getOperand(i).getReg();
767 else
768 LoopVal = Phi.getOperand(i).getReg();
769
770 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
771}
772
773
776 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
777 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
778 return Phi.getOperand(i).getReg();
779 return 0;
780}
781
782
787 while (!Worklist.empty()) {
789 for (const auto &SI : SU->Succs) {
790 SUnit *SuccSU = SI.getSUnit();
792 if (Visited.count(SuccSU))
793 continue;
794 if (SuccSU == SUb)
795 return true;
797 Visited.insert(SuccSU);
798 }
799 }
800 }
801 return false;
802}
803
804
805
807 return MI.isCall() || MI.mayRaiseFPException() ||
808 MI.hasUnmodeledSideEffects() ||
809 (MI.hasOrderedMemoryRef() &&
810 (.mayLoad() ||
.isDereferenceableInvariantLoad()));
811}
812
813
814
815
818 if (->hasOneMemOperand())
819 return;
822 return;
824 for (const Value *V : Objs) {
827 return;
828 }
829 }
830}
831
832
833
834
835
836void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
840 for (auto &SU : SUnits) {
843 PendingLoads.clear();
844 else if (MI.mayLoad()) {
847 if (Objs.empty())
849 for (const auto *V : Objs) {
852 }
853 } else if (MI.mayStore()) {
856 if (Objs.empty())
858 for (const auto *V : Objs) {
860 PendingLoads.find(V);
861 if (I == PendingLoads.end())
862 continue;
863 for (auto *Load : I->second) {
865 continue;
867
868
869
871 int64_t Offset1, Offset2;
872 bool Offset1IsScalable, Offset2IsScalable;
874 Offset1IsScalable, TRI) &&
876 Offset2IsScalable, TRI)) {
878 Offset1IsScalable == Offset2IsScalable &&
879 (int)Offset1 < (int)Offset2) {
881 "What happened to the chain edge?");
883 Dep.setLatency(1);
884 SU.addPred(Dep);
885 continue;
886 }
887 }
888
889
890
891 if (!AA) {
893 Dep.setLatency(1);
894 SU.addPred(Dep);
895 continue;
896 }
901 Dep.setLatency(1);
902 SU.addPred(Dep);
903 continue;
904 }
908 Dep.setLatency(1);
909 SU.addPred(Dep);
910 continue;
911 }
917 Dep.setLatency(1);
918 SU.addPred(Dep);
919 }
920 }
921 }
922 }
923 }
924}
925
926
927
928
929
930
931
932void SwingSchedulerDAG::updatePhiDependences() {
935
936
938 RemoveDeps.clear();
939
940 unsigned HasPhiUse = 0;
941 unsigned HasPhiDef = 0;
943
945 if (!MO.isReg())
946 continue;
948 if (MO.isDef()) {
949
953 UI != UE; ++UI) {
956 if (SU != nullptr && UseMI->isPHI()) {
957 if (->isPHI()) {
959 Dep.setLatency(1);
960 I.addPred(Dep);
961 } else {
962 HasPhiDef = Reg;
963
964
965 if (SU->NodeNum < I.NodeNum && .isPred(SU))
967 }
968 }
969 }
970 } else if (MO.isUse()) {
971
973 if (DefMI == nullptr)
974 continue;
976 if (SU != nullptr && DefMI->isPHI()) {
977 if (->isPHI()) {
979 Dep.setLatency(0);
980 ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep,
982 I.addPred(Dep);
983 } else {
984 HasPhiUse = Reg;
985
986
987 if (SU->NodeNum < I.NodeNum && .isPred(SU))
989 }
990 }
991 }
992 }
993
995 continue;
996 for (auto &PI : I.Preds) {
997 MachineInstr *PMI = PI.getSUnit()->getInstr();
999 if (I.getInstr()->isPHI()) {
1001 continue;
1003 continue;
1004 }
1006 }
1007 }
1008 for (const SDep &D : RemoveDeps)
1010 }
1011}
1012
1013
1014
1015void SwingSchedulerDAG::changeDependences() {
1016
1017
1018
1020 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1021 int64_t NewOffset = 0;
1022 if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
1023 NewOffset))
1024 continue;
1025
1026
1027 Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1030 continue;
1032 if (!DefSU)
1033 continue;
1034
1036 if (!LastMI)
1037 continue;
1039 if (!LastSU)
1040 continue;
1041
1043 continue;
1044
1045
1047 for (const SDep &P : I.Preds)
1048 if (P.getSUnit() == DefSU)
1050 for (const SDep &D : Deps) {
1053 }
1054
1055 Deps.clear();
1056 for (auto &P : LastSU->Preds)
1057 if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1058 Deps.push_back(P);
1059 for (const SDep &D : Deps) {
1062 }
1063
1064
1065
1069
1070
1071
1072 InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
1073 }
1074}
1075
1076
1077
1078
1079
1080
1083 std::vector<MachineInstr *> &OrderedInsts,
1086
1087
1090 for (int Stage = 0, LastStage = Schedule.getMaxStageCount();
1091 Stage <= LastStage; ++Stage) {
1094 Instrs[Cycle].push_front(SU);
1095 }
1096 }
1097 }
1098
1101 std::deque<SUnit *> &CycleInstrs = Instrs[Cycle];
1103 for (SUnit *SU : CycleInstrs) {
1105 OrderedInsts.push_back(MI);
1107 }
1108 }
1109}
1110
1111namespace {
1112
1113
1114
1115struct FuncUnitSorter {
1119
1121 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1122
1123
1124
1125
1126 unsigned minFuncUnits(const MachineInstr *Inst,
1129 unsigned min = UINT_MAX;
1130 if (InstrItins && !InstrItins->isEmpty()) {
1133 InstrItins->endStage(SchedClass))) {
1135 unsigned numAlternatives = llvm::popcount(funcUnits);
1136 if (numAlternatives < min) {
1137 min = numAlternatives;
1138 F = funcUnits;
1139 }
1140 }
1141 return min;
1142 }
1147
1148
1149 return min;
1150
1154 if (!PRE.ReleaseAtCycle)
1155 continue;
1158 unsigned NumUnits = ProcResource->NumUnits;
1159 if (NumUnits < min) {
1160 min = NumUnits;
1161 F = PRE.ProcResourceIdx;
1162 }
1163 }
1164 return min;
1165 }
1166 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1167 }
1168
1169
1170
1171
1172
1173
1175 unsigned SchedClass = MI.getDesc().getSchedClass();
1176 if (InstrItins && !InstrItins->isEmpty()) {
1179 InstrItins->endStage(SchedClass))) {
1182 Resources[FuncUnits]++;
1183 }
1184 return;
1185 }
1190
1191
1192 return;
1193
1197 if (!PRE.ReleaseAtCycle)
1198 continue;
1199 Resources[PRE.ProcResourceIdx]++;
1200 }
1201 return;
1202 }
1203 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1204 }
1205
1206
1209 unsigned MFUs1 = minFuncUnits(IS1, F1);
1210 unsigned MFUs2 = minFuncUnits(IS2, F2);
1211 if (MFUs1 == MFUs2)
1212 return Resources.lookup(F1) < Resources.lookup(F2);
1213 return MFUs1 > MFUs2;
1214 }
1215};
1216
1217
1218class HighRegisterPressureDetector {
1222
1223 const unsigned PSetNum;
1224
1225
1226
1227
1228
1229 std::vector InitSetPressure;
1230
1231
1232
1233 std::vector PressureSetLimit;
1234
1236
1238
1239public:
1240 using OrderedInstsTy = std::vector<MachineInstr *>;
1242
1243private:
1244 static void dumpRegisterPressures(const std::vector &Pressures) {
1245 if (Pressures.size() == 0) {
1246 dbgs() << "[]";
1247 } else {
1249 for (unsigned P : Pressures) {
1252 }
1253 dbgs() << ']';
1254 }
1255 }
1256
1259 for (auto PSetIter = MRI.getPressureSets(Reg); PSetIter.isValid();
1260 ++PSetIter) {
1261 dbgs() << *PSetIter << ' ';
1262 }
1263 dbgs() << '\n';
1264 }
1265
1266 void increaseRegisterPressure(std::vector &Pressure,
1268 auto PSetIter = MRI.getPressureSets(Reg);
1269 unsigned Weight = PSetIter.getWeight();
1270 for (; PSetIter.isValid(); ++PSetIter)
1271 Pressure[*PSetIter] += Weight;
1272 }
1273
1274 void decreaseRegisterPressure(std::vector &Pressure,
1276 auto PSetIter = MRI.getPressureSets(Reg);
1277 unsigned Weight = PSetIter.getWeight();
1278 for (; PSetIter.isValid(); ++PSetIter) {
1279 auto &P = Pressure[*PSetIter];
1281 "register pressure must be greater than or equal weight");
1282 P -= Weight;
1283 }
1284 }
1285
1286
1287 bool isReservedRegister(Register Reg) const {
1288 return Reg.isPhysical() && MRI.isReserved(Reg.asMCReg());
1289 }
1290
1291 bool isDefinedInThisLoop(Register Reg) const {
1292 return Reg.isVirtual() && MRI.getVRegDef(Reg)->getParent() == OrigMBB;
1293 }
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303 void computeLiveIn() {
1305 for (auto &MI : *OrigMBB) {
1306 if (MI.isDebugInstr())
1307 continue;
1308 for (auto &Use : ROMap[&MI].Uses) {
1310
1311
1313 continue;
1314 if (isReservedRegister(Reg))
1315 continue;
1316 if (isDefinedInThisLoop(Reg))
1317 continue;
1319 }
1320 }
1321
1322 for (auto LiveIn : Used)
1323 increaseRegisterPressure(InitSetPressure, LiveIn);
1324 }
1325
1326
1328 for (unsigned PSet = 0; PSet < PSetNum; PSet++)
1330 }
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343 Instr2LastUsesTy computeLastUses(const OrderedInstsTy &OrderedInsts,
1344 Instr2StageTy &Stages) const {
1345
1346
1347
1348
1350 const auto UpdateTargetRegs = [this, &TargetRegs](Register Reg) {
1351 if (isDefinedInThisLoop(Reg))
1353 };
1355 if (MI->isPHI()) {
1357 UpdateTargetRegs(Reg);
1358 } else {
1359 for (auto &Use : ROMap.find(MI)->getSecond().Uses)
1360 UpdateTargetRegs(Use.RegUnit);
1361 }
1362 }
1363
1364 const auto InstrScore = [&Stages](MachineInstr *MI) {
1365 return Stages[MI] + MI->isPHI();
1366 };
1367
1370 for (auto &Use : ROMap.find(MI)->getSecond().Uses) {
1373 continue;
1375 if (!Inserted) {
1378 if (InstrScore(Orig) < InstrScore(New))
1379 Ite->second = New;
1380 }
1381 }
1382 }
1383
1384 Instr2LastUsesTy LastUses;
1385 for (auto &Entry : LastUseMI)
1386 LastUses[Entry.second].insert(Entry.first);
1387 return LastUses;
1388 }
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402 std::vector
1403 computeMaxSetPressure(const OrderedInstsTy &OrderedInsts,
1404 Instr2StageTy &Stages,
1405 const unsigned StageCount) const {
1407
1408
1409
1411
1412 auto CurSetPressure = InitSetPressure;
1413 auto MaxSetPressure = InitSetPressure;
1414 auto LastUses = computeLastUses(OrderedInsts, Stages);
1415
1417 dbgs() << "Ordered instructions:\n";
1419 dbgs() << "Stage " << Stages[MI] << ": ";
1421 }
1422 });
1423
1424 const auto InsertReg = [this, &CurSetPressure](RegSetTy &RegSet,
1426 if (.isValid() || isReservedRegister(Reg))
1427 return;
1428
1429 bool Inserted = RegSet.insert(Reg).second;
1430 if (!Inserted)
1431 return;
1432
1434 increaseRegisterPressure(CurSetPressure, Reg);
1436 };
1437
1438 const auto EraseReg = [this, &CurSetPressure](RegSetTy &RegSet,
1440 if (.isValid() || isReservedRegister(Reg))
1441 return;
1442
1443
1444 if (!RegSet.contains(Reg))
1445 return;
1446
1448 RegSet.erase(Reg);
1449 decreaseRegisterPressure(CurSetPressure, Reg);
1451 };
1452
1453 for (unsigned I = 0; I < StageCount; I++) {
1455 const auto Stage = Stages[MI];
1456 if (I < Stage)
1457 continue;
1458
1459 const unsigned Iter = I - Stage;
1460
1461 for (auto &Def : ROMap.find(MI)->getSecond().Defs)
1462 InsertReg(LiveRegSets[Iter], Def.RegUnit);
1463
1464 for (auto LastUse : LastUses[MI]) {
1465 if (MI->isPHI()) {
1466 if (Iter != 0)
1467 EraseReg(LiveRegSets[Iter - 1], LastUse);
1468 } else {
1469 EraseReg(LiveRegSets[Iter], LastUse);
1470 }
1471 }
1472
1473 for (unsigned PSet = 0; PSet < PSetNum; PSet++)
1474 MaxSetPressure[PSet] =
1475 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1476
1478 dbgs() << "CurSetPressure=";
1479 dumpRegisterPressures(CurSetPressure);
1480 dbgs() << " iter=" << Iter << " stage=" << Stage << ":";
1482 });
1483 }
1484 }
1485
1486 return MaxSetPressure;
1487 }
1488
1489public:
1492 : OrigMBB(OrigMBB), MRI(MF.getRegInfo()),
1493 TRI(MF.getSubtarget().getRegisterInfo()),
1494 PSetNum(TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1495 PressureSetLimit(PSetNum, 0) {}
1496
1497
1498
1501 if (MI.isDebugInstr())
1502 continue;
1503 ROMap[&MI].collect(MI, *TRI, MRI, false, true);
1504 }
1505
1506 computeLiveIn();
1507 computePressureSetLimit(RCI);
1508 }
1509
1510
1511
1513 const unsigned MaxStage) const {
1515 "the percentage of the margin must be between 0 to 100");
1516
1517 OrderedInstsTy OrderedInsts;
1518 Instr2StageTy Stages;
1520 const auto MaxSetPressure =
1521 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1522
1524 dbgs() << "Dump MaxSetPressure:\n";
1525 for (unsigned I = 0; I < MaxSetPressure.size(); I++) {
1526 dbgs() << format("MaxSetPressure[%d]=%d\n", I, MaxSetPressure[I]);
1527 }
1528 dbgs() << '\n';
1529 });
1530
1531 for (unsigned PSet = 0; PSet < PSetNum; PSet++) {
1532 unsigned Limit = PressureSetLimit[PSet];
1534 LLVM_DEBUG(dbgs() << "PSet=" << PSet << " Limit=" << Limit
1535 << " Margin=" << Margin << "\n");
1536 if (Limit < MaxSetPressure[PSet] + Margin) {
1539 << "Rejected the schedule because of too high register pressure\n");
1540 return true;
1541 }
1542 }
1543 return false;
1544 }
1545};
1546
1547}
1548
1549
1550
1551
1552
1553
1554
1555unsigned SwingSchedulerDAG::calculateResMII() {
1558 return RM.calculateResMII();
1559}
1560
1561
1562
1563
1564
1565
1566
1567unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1568 unsigned RecMII = 0;
1569
1570 for (NodeSet &Nodes : NodeSets) {
1571 if (Nodes.empty())
1572 continue;
1573
1574 unsigned Delay = Nodes.getLatency();
1575 unsigned Distance = 1;
1576
1577
1578 unsigned CurMII = (Delay + Distance - 1) / Distance;
1579 Nodes.setRecMII(CurMII);
1580 if (CurMII > RecMII)
1581 RecMII = CurMII;
1582 }
1583
1584 return RecMII;
1585}
1586
1587
1588void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1592 for (int i = 0, e = SUnits.size(); i != e; ++i) {
1594
1595 for (auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1596
1597
1598 if (OE.isOutputDep()) {
1599 int N = OE.getDst()->NodeNum;
1600 int BackEdge = i;
1601 auto Dep = OutputDeps.find(BackEdge);
1602 if (Dep != OutputDeps.end()) {
1603 BackEdge = Dep->second;
1604 OutputDeps.erase(Dep);
1605 }
1606 OutputDeps[N] = BackEdge;
1607 }
1608
1609 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1610 continue;
1611
1612
1613
1614
1615
1616
1617
1618 if (OE.isAntiDep())
1619 continue;
1620
1621 int N = OE.getDst()->NodeNum;
1622 if (.test(N)) {
1623 AdjK[i].push_back(N);
1625 }
1626 }
1627
1628
1629 for (auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1630 SUnit *Src = IE.getSrc();
1631 SUnit *Dst = IE.getDst();
1632 if (!Dst->getInstr()->mayStore() || !DAG->isLoopCarriedDep(IE))
1633 continue;
1634 if (IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1635 int N = Src->NodeNum;
1636 if (.test(N)) {
1637 AdjK[i].push_back(N);
1639 }
1640 }
1641 }
1642 }
1643
1644 for (auto &OD : OutputDeps)
1645 if (.test(OD.second)) {
1646 AdjK[OD.first].push_back(OD.second);
1647 Added.set(OD.second);
1648 }
1649}
1650
1651
1652
1653bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1655 bool HasBackedge) {
1657 bool F = false;
1658 Stack.insert(SV);
1659 Blocked.set(V);
1660
1661 for (auto W : AdjK[V]) {
1662 if (NumPaths > MaxPaths)
1663 break;
1664 if (W < S)
1665 continue;
1666 if (W == S) {
1667 if (!HasBackedge)
1669 F = true;
1670 ++NumPaths;
1671 break;
1672 }
1673 if (!Blocked.test(W)) {
1674 if (circuit(W, S, NodeSets, DAG,
1675 Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1676 F = true;
1677 }
1678 }
1679
1680 if (F)
1681 unblock(V);
1682 else {
1683 for (auto W : AdjK[V]) {
1684 if (W < S)
1685 continue;
1687 }
1688 }
1689 Stack.pop_back();
1690 return F;
1691}
1692
1693
1694void SwingSchedulerDAG::Circuits::unblock(int U) {
1695 Blocked.reset(U);
1697 while (!BU.empty()) {
1699 assert(SI != BU.end() && "Invalid B set.");
1702 if (Blocked.test(W->NodeNum))
1703 unblock(W->NodeNum);
1704 }
1705}
1706
1707
1708
1709void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1710 Circuits Cir(SUnits, Topo);
1711
1712 Cir.createAdjacencyStructure(this);
1713 for (int I = 0, E = SUnits.size(); I != E; ++I) {
1714 Cir.reset();
1715 Cir.circuit(I, I, NodeSets, this);
1716 }
1717}
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
1739
1741 continue;
1742
1743
1745
1747
1748 for (auto &Dep : SU.Preds) {
1749 SUnit *TmpSU = Dep.getSUnit();
1752
1755
1756
1759 }
1760
1761 if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1762 continue;
1763
1764
1765
1767
1768 for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
1769 for (auto &Dep : PHISUs[Index]->Succs) {
1771 continue;
1772
1773 SUnit *TmpSU = Dep.getSUnit();
1777 continue;
1778 }
1780 }
1781 }
1782
1783 if (UseSUs.size() == 0)
1784 continue;
1785
1787
1788 for (auto *I : UseSUs) {
1789 for (auto *Src : SrcSUs) {
1790 if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1793 }
1794 }
1795 }
1796 }
1797}
1798
1799
1800
1801
1802
1803
1804
1805void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1806 ScheduleInfo.resize(SUnits.size());
1807
1809 for (int I : Topo) {
1812 }
1813 });
1814
1815 int maxASAP = 0;
1816
1817 for (int I : Topo) {
1818 int asap = 0;
1819 int zeroLatencyDepth = 0;
1821 for (const auto &IE : DDG->getInEdges(SU)) {
1822 SUnit *Pred = IE.getSrc();
1823 if (IE.getLatency() == 0)
1824 zeroLatencyDepth =
1826 if (IE.ignoreDependence(true))
1827 continue;
1828 asap = std::max(asap, (int)(getASAP(Pred) + IE.getLatency() -
1829 IE.getDistance() * MII));
1830 }
1831 maxASAP = std::max(maxASAP, asap);
1832 ScheduleInfo[I].ASAP = asap;
1833 ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
1834 }
1835
1836
1838 int alap = maxASAP;
1839 int zeroLatencyHeight = 0;
1841 for (const auto &OE : DDG->getOutEdges(SU)) {
1842 SUnit *Succ = OE.getDst();
1844 continue;
1845 if (OE.getLatency() == 0)
1846 zeroLatencyHeight =
1848 if (OE.ignoreDependence(true))
1849 continue;
1850 alap = std::min(alap, (int)(getALAP(Succ) - OE.getLatency() +
1851 OE.getDistance() * MII));
1852 }
1853
1854 ScheduleInfo[I].ALAP = alap;
1855 ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;
1856 }
1857
1858
1860 I.computeNodeSetInfo(this);
1861
1863 for (unsigned i = 0; i < SUnits.size(); i++) {
1864 dbgs() << "\tNode " << i << ":\n";
1872 }
1873 });
1874}
1875
1876
1877
1878
1881 const NodeSet *S = nullptr) {
1883
1885 for (const auto &IE : DDG->getInEdges(SU)) {
1886 SUnit *PredSU = IE.getSrc();
1887 if (S && S->count(PredSU) == 0)
1888 continue;
1889 if (IE.ignoreDependence(true))
1890 continue;
1891 if (NodeOrder.count(PredSU) == 0)
1892 Preds.insert(PredSU);
1893 }
1894
1895
1896
1897
1898
1899 for (const auto &OE : DDG->getOutEdges(SU)) {
1900 SUnit *SuccSU = OE.getDst();
1901 if (!OE.isAntiDep())
1902 continue;
1903 if (S && S->count(SuccSU) == 0)
1904 continue;
1905 if (NodeOrder.count(SuccSU) == 0)
1906 Preds.insert(SuccSU);
1907 }
1908 }
1909 return !Preds.empty();
1910}
1911
1912
1913
1914
1917 const NodeSet *S = nullptr) {
1919
1921 for (const auto &OE : DDG->getOutEdges(SU)) {
1922 SUnit *SuccSU = OE.getDst();
1923 if (S && S->count(SuccSU) == 0)
1924 continue;
1925 if (OE.ignoreDependence(false))
1926 continue;
1927 if (NodeOrder.count(SuccSU) == 0)
1928 Succs.insert(SuccSU);
1929 }
1930
1931
1932
1933
1934
1935 for (const auto &IE : DDG->getInEdges(SU)) {
1936 SUnit *PredSU = IE.getSrc();
1937 if (!IE.isAntiDep())
1938 continue;
1939 if (S && S->count(PredSU) == 0)
1940 continue;
1941 if (NodeOrder.count(PredSU) == 0)
1942 Succs.insert(PredSU);
1943 }
1944 }
1945 return !Succs.empty();
1946}
1947
1948
1949
1956 return false;
1958 return false;
1959 if (DestNodes.contains(Cur))
1960 return true;
1961 if (!Visited.insert(Cur).second)
1962 return Path.contains(Cur);
1963 bool FoundPath = false;
1964 for (const auto &OE : DDG->getOutEdges(Cur))
1965 if (!OE.ignoreDependence(false))
1966 FoundPath |=
1967 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
1968 for (const auto &IE : DDG->getInEdges(Cur))
1969 if (IE.isAntiDep() && IE.getDistance() == 0)
1970 FoundPath |=
1971 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
1972 if (FoundPath)
1973 Path.insert(Cur);
1974 return FoundPath;
1975}
1976
1977
1978
1979
1986 for (SUnit *SU : NS) {
1988 if (MI->isPHI())
1989 continue;
1992 if (Reg.isVirtual())
1993 Uses.insert(Reg);
1994 else if (MRI.isAllocatable(Reg))
1995 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
1996 Uses.insert(Unit);
1997 }
1998 }
1999 for (SUnit *SU : NS)
2001 if (!MO.isDead()) {
2003 if (Reg.isVirtual()) {
2004 if (.count(Reg))
2006 } else if (MRI.isAllocatable(Reg)) {
2007 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
2008 if (.count(Unit))
2010 }
2011 }
2013}
2014
2015
2016
2017void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2018 for (auto &NS : NodeSets) {
2019
2020 if (NS.size() <= 2)
2021 continue;
2024 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
2026 RecRPTracker.closeBottom();
2027
2028 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2030 return A->NodeNum > B->NodeNum;
2031 });
2032
2033 for (auto &SU : SUnits) {
2034
2035
2036
2037
2039 RecRPTracker.setPos(std::next(CurInstI));
2040
2043 RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
2044 CriticalPSets,
2048 dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
2051 NS.setExceedPressure(SU);
2052 break;
2053 }
2054 RecRPTracker.recede();
2055 }
2056 }
2057}
2058
2059
2060
2061void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2062 unsigned Colocate = 0;
2063 for (int i = 0, e = NodeSets.size(); i < e; ++i) {
2064 NodeSet &N1 = NodeSets[i];
2067 continue;
2068 for (int j = i + 1; j < e; ++j) {
2071 continue;
2073 if (N2.empty() || (N2, S2, DDG.get()))
2074 continue;
2078 break;
2079 }
2080 }
2081 }
2082}
2083
2084
2085
2086
2087
2088
2089void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2090
2091 if (MII < 17)
2092 return;
2093
2094 for (auto &NS : NodeSets) {
2095 if (NS.getRecMII() > 2)
2096 return;
2097 if (NS.getMaxDepth() > MII)
2098 return;
2099 }
2100 NodeSets.clear();
2101 LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
2102}
2103
2104
2105
2106void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2109
2110
2111 for (NodeSet &I : NodeSets) {
2113
2114 if (succ_L(I, N, DDG.get())) {
2117 Visited.clear();
2118 computePath(NI, Path, NodesAdded, I, Visited, DDG.get());
2119 }
2120 if (.empty())
2121 I.insert(Path.begin(), Path.end());
2122 }
2123
2124 N.clear();
2125 if (succ_L(NodesAdded, N, DDG.get())) {
2128 Visited.clear();
2129 computePath(NI, Path, I, NodesAdded, Visited, DDG.get());
2130 }
2131 if (.empty())
2132 I.insert(Path.begin(), Path.end());
2133 }
2134 NodesAdded.insert(I.begin(), I.end());
2135 }
2136
2137
2138
2141 if (succ_L(NodesAdded, N, DDG.get()))
2143 addConnectedNodes(I, NewSet, NodesAdded);
2144 if (!NewSet.empty())
2145 NodeSets.push_back(NewSet);
2146
2147
2148
2150 if (pred_L(NodesAdded, N, DDG.get()))
2152 addConnectedNodes(I, NewSet, NodesAdded);
2153 if (!NewSet.empty())
2154 NodeSets.push_back(NewSet);
2155
2156
2157
2159 if (NodesAdded.count(&SU) == 0) {
2161 addConnectedNodes(&SU, NewSet, NodesAdded);
2162 if (!NewSet.empty())
2163 NodeSets.push_back(NewSet);
2164 }
2165 }
2166}
2167
2168
2169void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2172 NodesAdded.insert(SU);
2173 for (auto &OE : DDG->getOutEdges(SU)) {
2175 if (!OE.isArtificial() && ->isBoundaryNode() &&
2177 addConnectedNodes(Successor, NewSet, NodesAdded);
2178 }
2179 for (auto &IE : DDG->getInEdges(SU)) {
2180 SUnit *Predecessor = IE.getSrc();
2181 if (.isArtificial() && NodesAdded.count(Predecessor) == 0)
2182 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2183 }
2184}
2185
2186
2187
2190 Result.clear();
2191 for (SUnit *SU : Set1) {
2192 if (Set2.count(SU) != 0)
2193 Result.insert(SU);
2194 }
2195 return !Result.empty();
2196}
2197
2198
2199void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2201 ++I) {
2208 for (SUnit *SU : *J)
2209 I->insert(SU);
2210 NodeSets.erase(J);
2211 E = NodeSets.end();
2212 } else {
2213 ++J;
2214 }
2215 }
2216 }
2217}
2218
2219
2220void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2222 ++I)
2224 J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
2225
2226 if (J->empty()) {
2227 NodeSets.erase(J);
2228 E = NodeSets.end();
2229 } else {
2230 ++J;
2231 }
2232 }
2233}
2234
2235
2236
2237
2238
2239void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2241 NodeOrder.clear();
2242
2243 for (auto &Nodes : NodeSets) {
2244 LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2245 OrderKind Order;
2248 R.insert(N.begin(), N.end());
2249 Order = BottomUp;
2251 } else if (succ_L(NodeOrder, N, DDG.get()) &&
2253 R.insert(N.begin(), N.end());
2254 Order = TopDown;
2257
2258
2259 Order = TopDown;
2261 } else if (NodeSets.size() == 1) {
2262 for (const auto &N : Nodes)
2263 if (N->Succs.size() == 0)
2265 Order = BottomUp;
2267 } else {
2268
2269 SUnit *maxASAP = nullptr;
2270 for (SUnit *SU : Nodes) {
2271 if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
2273 maxASAP = SU;
2274 }
2275 R.insert(maxASAP);
2276 Order = BottomUp;
2278 }
2279
2280 while (.empty()) {
2281 if (Order == TopDown) {
2282
2283
2284
2285 while (.empty()) {
2286 SUnit *maxHeight = nullptr;
2289 maxHeight = I;
2292 maxHeight = I;
2297 maxHeight = I;
2298 }
2299 NodeOrder.insert(maxHeight);
2301 R.remove(maxHeight);
2302 for (const auto &OE : DDG->getOutEdges(maxHeight)) {
2303 SUnit *SU = OE.getDst();
2304 if (Nodes.count(SU) == 0)
2305 continue;
2306 if (NodeOrder.contains(SU))
2307 continue;
2308 if (OE.ignoreDependence(false))
2309 continue;
2310 R.insert(SU);
2311 }
2312
2313
2314
2315
2316
2317 for (const auto &IE : DDG->getInEdges(maxHeight)) {
2319 if (.isAntiDep())
2320 continue;
2321 if (Nodes.count(SU) == 0)
2322 continue;
2323 if (NodeOrder.contains(SU))
2324 continue;
2325 R.insert(SU);
2326 }
2327 }
2328 Order = BottomUp;
2329 LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
2331 if (pred_L(NodeOrder, N, DDG.get(), &Nodes))
2332 R.insert(N.begin(), N.end());
2333 } else {
2334
2335
2336
2337 while (.empty()) {
2338 SUnit *maxDepth = nullptr;
2341 maxDepth = I;
2344 maxDepth = I;
2348 maxDepth = I;
2349 }
2350 NodeOrder.insert(maxDepth);
2352 R.remove(maxDepth);
2353 if (Nodes.isExceedSU(maxDepth)) {
2354 Order = TopDown;
2355 R.clear();
2356 R.insert(Nodes.getNode(0));
2357 break;
2358 }
2359 for (const auto &IE : DDG->getInEdges(maxDepth)) {
2361 if (Nodes.count(SU) == 0)
2362 continue;
2363 if (NodeOrder.contains(SU))
2364 continue;
2365 R.insert(SU);
2366 }
2367
2368
2369
2370
2371
2372 for (const auto &OE : DDG->getOutEdges(maxDepth)) {
2373 SUnit *SU = OE.getDst();
2374 if (!OE.isAntiDep())
2375 continue;
2376 if (Nodes.count(SU) == 0)
2377 continue;
2378 if (NodeOrder.contains(SU))
2379 continue;
2380 R.insert(SU);
2381 }
2382 }
2383 Order = TopDown;
2384 LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
2386 if (succ_L(NodeOrder, N, DDG.get(), &Nodes))
2387 R.insert(N.begin(), N.end());
2388 }
2389 }
2391 }
2392
2394 dbgs() << "Node order: ";
2395 for (SUnit *I : NodeOrder)
2396 dbgs() << " " << I->NodeNum << " ";
2397 dbgs() << "\n";
2398 });
2399}
2400
2401
2402
2403bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2404
2405 if (NodeOrder.empty()){
2406 LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
2407 return false;
2408 }
2409
2410 bool scheduleFound = false;
2411 std::unique_ptr HRPDetector;
2413 HRPDetector =
2414 std::make_unique(Loop.getHeader(), MF);
2415 HRPDetector->init(RegClassInfo);
2416 }
2417
2418 for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2419 Schedule.reset();
2421 LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2422
2425 do {
2426 SUnit *SU = *NI;
2427
2428
2429
2430 int EarlyStart = INT_MIN;
2431 int LateStart = INT_MAX;
2432 Schedule.computeStart(SU, &EarlyStart, &LateStart, II, this);
2434 dbgs() << "\n";
2435 dbgs() << "Inst (" << SU->NodeNum << ") ";
2437 dbgs() << "\n";
2438 });
2440 dbgs() << format("\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2441
2442 if (EarlyStart > LateStart)
2443 scheduleFound = false;
2444 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2445 scheduleFound =
2446 Schedule.insert(SU, EarlyStart, EarlyStart + (int)II - 1, II);
2447 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2448 scheduleFound =
2449 Schedule.insert(SU, LateStart, LateStart - (int)II + 1, II);
2450 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2451 LateStart = std::min(LateStart, EarlyStart + (int)II - 1);
2452
2453
2454
2455
2456
2457
2460 scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II);
2461 else
2462 scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II);
2463 } else {
2465 scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2467 }
2468
2469
2470
2471 if (scheduleFound)
2474 scheduleFound = false;
2475
2477 if (!scheduleFound)
2478 dbgs() << "\tCan't schedule\n";
2479 });
2480 } while (++NI != NE && scheduleFound);
2481
2482
2483 if (scheduleFound)
2484 scheduleFound =
2486
2487
2488 if (scheduleFound)
2490
2491
2492
2494 scheduleFound =
2495 !HRPDetector->detect(this, Schedule, Schedule.getMaxStageCount());
2496 }
2497
2498 LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
2500 << ")\n");
2501
2502 if (scheduleFound) {
2503 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);
2504 if (!scheduleFound)
2506 }
2507
2508 if (scheduleFound) {
2510 Pass.ORE->emit([&]() {
2513 << "Schedule found with Initiation Interval: "
2515 << ", MaxStageCount: "
2517 });
2518 } else
2519 Schedule.reset();
2520
2522}
2523
2524
2525
2526bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const {
2530 bool OffsetIsScalable;
2532 return false;
2533
2534
2535 if (OffsetIsScalable)
2536 return false;
2537
2538 if (!BaseOp->isReg())
2539 return false;
2540
2542
2544
2546 if (BaseDef && BaseDef->isPHI()) {
2549 }
2550 if (!BaseDef)
2551 return false;
2552
2553 int D = 0;
2555 return false;
2556
2557 Delta = D;
2558 return true;
2559}
2560
2561
2562
2563
2564
2565
2566
2567
2568bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2569 unsigned &BasePos,
2570 unsigned &OffsetPos,
2571 unsigned &NewBase,
2573
2575 return false;
2576 unsigned BasePosLd, OffsetPosLd;
2578 return false;
2579 Register BaseReg = MI->getOperand(BasePosLd).getReg();
2580
2581
2584 if (!Phi || ->isPHI())
2585 return false;
2586
2587 unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2588 if (!PrevReg)
2589 return false;
2590
2591
2593 if (!PrevDef || PrevDef == MI)
2594 return false;
2595
2597 return false;
2598
2599 unsigned BasePos1 = 0, OffsetPos1 = 0;
2601 return false;
2602
2603
2604
2605 int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2606 int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2608 NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2611 if (!Disjoint)
2612 return false;
2613
2614
2615 BasePos = BasePosLd;
2616 OffsetPos = OffsetPosLd;
2617 NewBase = PrevReg;
2618 Offset = StoreOffset;
2619 return true;
2620}
2621
2622
2623
2628 InstrChanges.find(SU);
2629 if (It != InstrChanges.end()) {
2630 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2631 unsigned BasePos, OffsetPos;
2633 return;
2634 Register BaseReg = MI->getOperand(BasePos).getReg();
2635 MachineInstr *LoopDef = findDefInLoop(BaseReg);
2640 if (BaseStageNum < DefStageNum) {
2642 int OffsetDiff = DefStageNum - BaseStageNum;
2643 if (DefCycleNum < BaseCycleNum) {
2645 if (OffsetDiff > 0)
2646 --OffsetDiff;
2647 }
2648 int64_t NewOffset =
2649 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2653 NewMIs[MI] = NewMI;
2654 }
2655 }
2656}
2657
2658
2659
2660
2664 while (Def->isPHI()) {
2665 if (!Visited.insert(Def).second)
2666 break;
2667 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2668 if (Def->getOperand(i + 1).getMBB() == BB) {
2669 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2670 break;
2671 }
2672 }
2673 return Def;
2674}
2675
2676
2677
2678
2683 return false;
2684
2686 return true;
2687
2689 return true;
2690
2693 assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2694
2695
2699 return true;
2700
2702 return false;
2703
2704
2705
2706
2707 unsigned DeltaS, DeltaD;
2708 if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2709 return true;
2710
2712 int64_t OffsetS, OffsetD;
2713 bool OffsetSIsScalable, OffsetDIsScalable;
2719 return true;
2720
2721 assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2722 "Expected offsets to be byte offsets");
2723
2726 if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI())
2727 return true;
2728
2729 unsigned InitValS = 0;
2730 unsigned LoopValS = 0;
2731 unsigned InitValD = 0;
2732 unsigned LoopValD = 0;
2737
2739 return true;
2740
2741
2742
2744 int D = 0;
2746 return true;
2747
2750
2751
2752
2754 return true;
2755
2756 if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() ||
2757 DeltaD < AccessSizeD.getValue())
2758 return true;
2759
2760 return (OffsetS + (int64_t)AccessSizeS.getValue() <
2761 OffsetD + (int64_t)AccessSizeD.getValue());
2762}
2763
2764void SwingSchedulerDAG::postProcessDAG() {
2765 for (auto &M : Mutations)
2766 M->apply(this);
2767}
2768
2769
2770
2771
2772
2773
2775 bool forward = true;
2777 dbgs() << "Trying to insert node between " << StartCycle << " and "
2778 << EndCycle << " II: " << II << "\n";
2779 });
2780 if (StartCycle > EndCycle)
2781 forward = false;
2782
2783
2784 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2785 for (int curCycle = StartCycle; curCycle != termCycle;
2786 forward ? ++curCycle : --curCycle) {
2787
2788 if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2791 dbgs() << "\tinsert at cycle " << curCycle << " ";
2793 });
2794
2795 if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
2796 ProcItinResources.reserveResources(*SU, curCycle);
2797 ScheduledInstrs[curCycle].push_back(SU);
2798 InstrToCycle.insert(std::make_pair(SU, curCycle));
2799 if (curCycle > LastCycle)
2800 LastCycle = curCycle;
2801 if (curCycle < FirstCycle)
2802 FirstCycle = curCycle;
2803 return true;
2804 }
2806 dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2808 });
2809 }
2810 return false;
2811}
2812
2813
2819 int EarlyCycle = INT_MAX;
2820 while (!Worklist.empty()) {
2823 if (Visited.count(PrevSU))
2824 continue;
2825 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2826 if (it == InstrToCycle.end())
2827 continue;
2828 EarlyCycle = std::min(EarlyCycle, it->second);
2829 for (const auto &IE : DDG->getInEdges(PrevSU))
2830 if (IE.isOrderDep() || IE.isOutputDep())
2832 Visited.insert(PrevSU);
2833 }
2834 return EarlyCycle;
2835}
2836
2837
2843 int LateCycle = INT_MIN;
2844 while (!Worklist.empty()) {
2848 continue;
2849 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2850 if (it == InstrToCycle.end())
2851 continue;
2852 LateCycle = std::max(LateCycle, it->second);
2853 for (const auto &OE : DDG->getOutEdges(SuccSU))
2854 if (OE.isOrderDep() || OE.isOutputDep())
2856 Visited.insert(SuccSU);
2857 }
2858 return LateCycle;
2859}
2860
2861
2862
2863
2865 for (auto &P : SU->Preds)
2866 if (P.getKind() == SDep::Anti && P.getSUnit()->getInstr()->isPHI())
2867 for (auto &S : P.getSUnit()->Succs)
2868 if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2869 return P.getSUnit();
2870 return nullptr;
2871}
2872
2873
2874
2878
2879
2880
2881
2882 for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2884 for (const auto &IE : DDG->getInEdges(SU)) {
2885 if (IE.getSrc() == I) {
2886
2887
2890 *MinLateStart = std::min(*MinLateStart, End);
2891 }
2892 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() * II;
2893 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2894 }
2895 }
2896
2897 for (const auto &OE : DDG->getOutEdges(SU)) {
2898 if (OE.getDst() == I) {
2899
2900
2903 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
2904 }
2905 int LateStart = cycle - OE.getLatency() + OE.getDistance() * II;
2906 *MinLateStart = std::min(*MinLateStart, LateStart);
2907 }
2908 }
2909
2911 for (const auto &Dep : SU->Preds) {
2912
2913
2914 if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2916 *MinLateStart = std::min(*MinLateStart, cycle);
2917 }
2918 }
2919 }
2920}
2921
2922
2923
2924
2926 std::deque<SUnit *> &Insts) const {
2928 bool OrderBeforeUse = false;
2929 bool OrderAfterDef = false;
2930 bool OrderBeforeDef = false;
2931 unsigned MoveDef = 0;
2932 unsigned MoveUse = 0;
2935
2936 unsigned Pos = 0;
2937 for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2938 ++I, ++Pos) {
2940 if (!MO.isReg() || !MO.getReg().isVirtual())
2941 continue;
2942
2944 unsigned BasePos, OffsetPos;
2945 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2946 if (MI->getOperand(BasePos).getReg() == Reg)
2948 Reg = NewReg;
2950 std::tie(Reads, Writes) =
2951 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2952 if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
2953 OrderBeforeUse = true;
2954 if (MoveUse == 0)
2955 MoveUse = Pos;
2956 } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
2957
2958 OrderAfterDef = true;
2959 MoveDef = Pos;
2962 OrderBeforeUse = true;
2963 if (MoveUse == 0)
2964 MoveUse = Pos;
2965 } else {
2966 OrderAfterDef = true;
2967 MoveDef = Pos;
2968 }
2970 OrderBeforeUse = true;
2971 if (MoveUse == 0)
2972 MoveUse = Pos;
2973 if (MoveUse != 0) {
2974 OrderAfterDef = true;
2975 MoveDef = Pos - 1;
2976 }
2978
2979 OrderBeforeUse = true;
2980 if (MoveUse == 0)
2981 MoveUse = Pos;
2982 } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
2984 if (MoveUse == 0) {
2985 OrderBeforeDef = true;
2986 MoveUse = Pos;
2987 }
2988 }
2989 }
2990
2991
2993 if (OE.getDst() != *I)
2994 continue;
2995 if (OE.isOrderDep() && stageScheduled(*I) == StageInst1) {
2996 OrderBeforeUse = true;
2997 if (Pos < MoveUse)
2998 MoveUse = Pos;
2999 }
3000
3001
3002
3003 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3005 OrderBeforeUse = true;
3006 if ((MoveUse == 0) || (Pos < MoveUse))
3007 MoveUse = Pos;
3008 }
3009 }
3010 for (auto &IE : DDG->getInEdges(SU)) {
3011 if (IE.getSrc() != *I)
3012 continue;
3013 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3015 OrderAfterDef = true;
3016 MoveDef = Pos;
3017 }
3018 }
3019 }
3020
3021
3022 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3023 OrderBeforeUse = false;
3024
3025
3026
3027 if (OrderBeforeDef)
3028 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3029
3030
3031
3032 if (OrderBeforeUse && OrderAfterDef) {
3033 SUnit *UseSU = Insts.at(MoveUse);
3034 SUnit *DefSU = Insts.at(MoveDef);
3035 if (MoveUse > MoveDef) {
3036 Insts.erase(Insts.begin() + MoveUse);
3037 Insts.erase(Insts.begin() + MoveDef);
3038 } else {
3039 Insts.erase(Insts.begin() + MoveDef);
3040 Insts.erase(Insts.begin() + MoveUse);
3041 }
3045 return;
3046 }
3047
3048
3049 if (OrderBeforeUse)
3050 Insts.push_front(SU);
3051 else
3052 Insts.push_back(SU);
3053}
3054
3055
3058 if (!Phi.isPHI())
3059 return false;
3060 assert(Phi.isPHI() && "Expecting a Phi.");
3064
3065 unsigned InitVal = 0;
3066 unsigned LoopVal = 0;
3067 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3069 if (!UseSU)
3070 return true;
3072 return true;
3075 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3076}
3077
3078
3079
3080
3081
3082
3083
3084
3088 if (!MO.isReg())
3089 return false;
3090 if (Def->isPHI())
3091 return false;
3093 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3094 return false;
3096 return false;
3097 unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3099 if (DMO.getReg() == LoopReg)
3100 return true;
3101 }
3102 return false;
3103}
3104
3105
3106
3109 for (const auto &IE : DDG->getInEdges(SU))
3110 if (InstrToCycle.count(IE.getSrc()))
3111 return false;
3112 return true;
3113}
3114
3115
3120
3121 for (auto &SU : SSD->SUnits)
3124
3126 while (!Worklist.empty()) {
3128 if (DoNotPipeline.count(SU))
3129 continue;
3131 DoNotPipeline.insert(SU);
3132 for (const auto &IE : DDG->getInEdges(SU))
3133 Worklist.push_back(IE.getSrc());
3134
3135
3136
3137 for (const auto &OE : DDG->getOutEdges(SU))
3138 if (OE.getDistance() == 1)
3139 Worklist.push_back(OE.getDst());
3140 }
3141 return DoNotPipeline;
3142}
3143
3144
3145
3149
3150 int NewLastCycle = INT_MIN;
3153 continue;
3155 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3156 continue;
3157 }
3158
3159
3162 if (IE.getDistance() == 0)
3163 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3164
3165
3166
3168 if (OE.getDistance() == 1)
3169 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3170
3171 int OldCycle = InstrToCycle[&SU];
3172 if (OldCycle != NewCycle) {
3173 InstrToCycle[&SU] = NewCycle;
3178 << ") is not pipelined; moving from cycle " << OldCycle
3179 << " to " << NewCycle << " Instr:" << *SU.getInstr());
3180 }
3181 NewLastCycle = std::max(NewLastCycle, NewCycle);
3182 }
3183 LastCycle = NewLastCycle;
3184 return true;
3185}
3186
3187
3188
3189
3190
3191
3192
3193
3194
3198 continue;
3200 int CycleDef = InstrToCycle[&SU];
3201 assert(StageDef != -1 && "Instruction should have been scheduled.");
3203 SUnit *Dst = OE.getDst();
3204 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3205 if (OE.getReg().isPhysical()) {
3207 return false;
3208 if (InstrToCycle[Dst] <= CycleDef)
3209 return false;
3210 }
3211 }
3212 }
3213 return true;
3214}
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3227
3228
3229 typedef std::pair<SUnit *, unsigned> UnitIndex;
3230 std::vector Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
3231
3232 for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3233 Indices.push_back(std::make_pair(NodeOrder[i], i));
3234
3235 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3236 return std::get<0>(i1) < std::get<0>(i2);
3237 };
3238
3239
3241
3242 bool Valid = true;
3243 (void)Valid;
3244
3245
3246
3247
3248
3249 for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3250 SUnit *SU = NodeOrder[i];
3251 unsigned Index = i;
3252
3253 bool PredBefore = false;
3254 bool SuccBefore = false;
3255
3258 (void)Succ;
3259 (void)Pred;
3260
3261 for (const auto &IE : DDG->getInEdges(SU)) {
3262 SUnit *PredSU = IE.getSrc();
3263 unsigned PredIndex = std::get<1>(
3264 *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
3265 if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
3266 PredBefore = true;
3267 Pred = PredSU;
3268 break;
3269 }
3270 }
3271
3272 for (const auto &OE : DDG->getOutEdges(SU)) {
3273 SUnit *SuccSU = OE.getDst();
3274
3275
3276
3278 continue;
3279 unsigned SuccIndex = std::get<1>(
3280 *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
3281 if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
3282 SuccBefore = true;
3283 Succ = SuccSU;
3284 break;
3285 }
3286 }
3287
3288 if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
3289
3290
3292 Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
3293 if (InCircuit)
3295 else {
3296 Valid = false;
3297 NumNodeOrderIssues++;
3299 }
3301 << " are scheduled before node " << SU->NodeNum
3302 << "\n");
3303 }
3304 }
3305
3307 if (!Valid)
3308 dbgs() << "Invalid node order found!\n";
3309 });
3310}
3311
3312
3313
3314
3315
3316
3317
3319 unsigned OverlapReg = 0;
3320 unsigned NewBaseReg = 0;
3321 for (SUnit *SU : Instrs) {
3323 for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3325
3326
3327 if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3328
3329
3331 InstrChanges.find(SU);
3332 if (It != InstrChanges.end()) {
3333 unsigned BasePos, OffsetPos;
3334
3338 int64_t NewOffset =
3339 MI->getOperand(OffsetPos).getImm() - It->second.second;
3343 NewMIs[MI] = NewMI;
3344 }
3345 }
3346 OverlapReg = 0;
3347 NewBaseReg = 0;
3348 break;
3349 }
3350
3351
3352 unsigned TiedUseIdx = 0;
3353 if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3354
3355 OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3356
3357 NewBaseReg = MI->getOperand(i).getReg();
3358 break;
3359 }
3360 }
3361 }
3362}
3363
3364std::deque<SUnit *>
3366 const std::deque<SUnit *> &Instrs) const {
3367 std::deque<SUnit *> NewOrderPhi;
3368 for (SUnit *SU : Instrs) {
3370 NewOrderPhi.push_back(SU);
3371 }
3372 std::deque<SUnit *> NewOrderI;
3373 for (SUnit *SU : Instrs) {
3376 }
3378 return NewOrderPhi;
3379}
3380
3381
3382
3383
3385
3387 for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3388 ++stage) {
3389 std::deque<SUnit *> &cycleInstrs =
3390 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3392 ScheduledInstrs[cycle].push_front(SU);
3393 }
3394 }
3395
3396
3397
3398 for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3399 ScheduledInstrs.erase(cycle);
3400
3401
3402
3405
3406
3407
3409 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3412 }
3413
3415}
3416
3418 os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
3419 << " depth " << MaxDepth << " col " << Colocate << "\n";
3420 for (const auto &I : Nodes)
3421 os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
3422 os << "\n";
3423}
3424
3425#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3426
3428
3430
3432 for (SUnit *CI : cycleInstrs->second) {
3433 os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3434 os << "(" << CI->NodeNum << ") ";
3436 os << "\n";
3437 }
3438 }
3439}
3440
3441
3444
3445void ResourceManager::dumpMRT() const {
3447 if (UseDFA)
3448 return;
3449 std::stringstream SS;
3450 SS << "MRT:\n";
3451 SS << std::setw(4) << "Slot";
3453 SS << std::setw(3) << I;
3454 SS << std::setw(7) << "#Mops"
3455 << "\n";
3456 for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
3457 SS << std::setw(4) << Slot;
3459 SS << std::setw(3) << MRT[Slot][I];
3460 SS << std::setw(7) << NumScheduledMops[Slot] << "\n";
3461 }
3462 dbgs() << SS.str();
3463 });
3464}
3465#endif
3466
3469 unsigned ProcResourceID = 0;
3470
3471
3472
3474 "Too many kinds of resources, unsupported");
3475
3476
3480 if (Desc.SubUnitsIdxBegin)
3481 continue;
3482 Masks[I] = 1ULL << ProcResourceID;
3483 ProcResourceID++;
3484 }
3485
3488 if (.SubUnitsIdxBegin)
3489 continue;
3490 Masks[I] = 1ULL << ProcResourceID;
3491 for (unsigned U = 0; U < Desc.NumUnits; ++U)
3492 Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
3493 ProcResourceID++;
3494 }
3497 dbgs() << "ProcResourceDesc:\n";
3500 dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3501 ProcResource->Name, I, Masks[I],
3503 }
3504 dbgs() << " -----------------\n";
3505 }
3506 });
3507}
3508
3512 dbgs() << "canReserveResources:\n";
3513 });
3514 if (UseDFA)
3515 return DFAResources[positiveModulo(Cycle, InitiationInterval)]
3517
3519 if (!SCDesc->isValid()) {
3521 dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3523 });
3524 return true;
3525 }
3526
3527 reserveResources(SCDesc, Cycle);
3528 bool Result = !isOverbooked();
3529 unreserveResources(SCDesc, Cycle);
3530
3532 return Result;
3533}
3534
3535void ResourceManager::reserveResources(SUnit &SU, int Cycle) {
3538 dbgs() << "reserveResources:\n";
3539 });
3540 if (UseDFA)
3541 return DFAResources[positiveModulo(Cycle, InitiationInterval)]
3543
3545 if (!SCDesc->isValid()) {
3547 dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3549 });
3550 return;
3551 }
3552
3553 reserveResources(SCDesc, Cycle);
3554
3557 dumpMRT();
3558 dbgs() << "reserveResources: done!\n\n";
3559 }
3560 });
3561}
3562
3563void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
3568 for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)
3569 ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
3570
3572 ++NumScheduledMops[positiveModulo(C, InitiationInterval)];
3573}
3574
3575void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
3580 for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)
3581 --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
3582
3584 --NumScheduledMops[positiveModulo(C, InitiationInterval)];
3585}
3586
3587bool ResourceManager::isOverbooked() const {
3589 for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
3592 if (MRT[Slot][I] > Desc->NumUnits)
3593 return true;
3594 }
3595 if (NumScheduledMops[Slot] > IssueWidth)
3596 return true;
3597 }
3598 return false;
3599}
3600
3601int ResourceManager::calculateResMIIDFA() const {
3603
3604
3605
3606 FuncUnitSorter FUS = FuncUnitSorter(*ST);
3608 FUS.calcCriticalResources(*SU.getInstr());
3610 FuncUnitOrder(FUS);
3611
3613 FuncUnitOrder.push(SU.getInstr());
3614
3618
3619 while (!FuncUnitOrder.empty()) {
3621 FuncUnitOrder.pop();
3623 continue;
3624
3625
3626
3628 unsigned ReservedCycles = 0;
3629 auto *RI = Resources.begin();
3630 auto *RE = Resources.end();
3632 dbgs() << "Trying to reserve resource for " << NumCycles
3633 << " cycles for \n";
3635 });
3636 for (unsigned C = 0; C < NumCycles; ++C)
3637 while (RI != RE) {
3638 if ((*RI)->canReserveResources(*MI)) {
3639 (*RI)->reserveResources(*MI);
3640 ++ReservedCycles;
3641 break;
3642 }
3643 RI++;
3644 }
3645 LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
3646 << ", NumCycles:" << NumCycles << "\n");
3647
3648 for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
3650 << "NewResource created to reserve resources"
3651 << "\n");
3653 assert(NewResource->canReserveResources(*MI) && "Reserve error.");
3654 NewResource->reserveResources(*MI);
3655 Resources.push_back(std::unique_ptr(NewResource));
3656 }
3657 }
3658
3659 int Resmii = Resources.size();
3660 LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
3661 return Resmii;
3662}
3663
3665 if (UseDFA)
3666 return calculateResMIIDFA();
3667
3668
3669
3670
3671 int NumMops = 0;
3675 continue;
3676
3679 continue;
3680
3685 << " WriteProcRes: ";
3686 }
3687 });
3696 dbgs() << Desc->Name << ": " << PRE.ReleaseAtCycle << ", ";
3697 }
3698 });
3699 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
3700 }
3702 }
3703
3704 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3707 dbgs() << "#Mops: " << NumMops << ", "
3708 << "IssueWidth: " << IssueWidth << ", "
3709 << "Cycles: " << Result << "\n";
3710 });
3711
3714 std::stringstream SS;
3715 SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)
3716 << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"
3717 << "\n";
3718 dbgs() << SS.str();
3719 }
3720 });
3723 int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;
3726 std::stringstream SS;
3727 SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)
3728 << Desc->NumUnits << std::setw(10) << ResourceCount[I]
3729 << std::setw(10) << Cycles << "\n";
3730 dbgs() << SS.str();
3731 }
3732 });
3733 if (Cycles > Result)
3734 Result = Cycles;
3735 }
3736 return Result;
3737}
3738
3740 InitiationInterval = II;
3741 DFAResources.clear();
3742 DFAResources.resize(II);
3743 for (auto &I : DFAResources)
3744 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3747 NumScheduledMops.clear();
3748 NumScheduledMops.resize(II);
3749}
3750
3752 if (Pred.isArtificial() || Dst->isBoundaryNode())
3753 return true;
3754
3755
3756
3757 return IgnoreAnti && (Pred.getKind() == SDep::Kind::Anti || Distance != 0);
3758}
3759
3760SwingSchedulerDDG::SwingSchedulerDDGEdges &
3761SwingSchedulerDDG::getEdges(const SUnit *SU) {
3762 if (SU == EntrySU)
3763 return EntrySUEdges;
3764 if (SU == ExitSU)
3765 return ExitSUEdges;
3766 return EdgesVec[SU->NodeNum];
3767}
3768
3769const SwingSchedulerDDG::SwingSchedulerDDGEdges &
3770SwingSchedulerDDG::getEdges(const SUnit *SU) const {
3771 if (SU == EntrySU)
3772 return EntrySUEdges;
3773 if (SU == ExitSU)
3774 return ExitSUEdges;
3775 return EdgesVec[SU->NodeNum];
3776}
3777
3778void SwingSchedulerDDG::addEdge(const SUnit *SU,
3780 auto &Edges = getEdges(SU);
3781 if (Edge.getSrc() == SU)
3782 Edges.Succs.push_back(Edge);
3783 else
3784 Edges.Preds.push_back(Edge);
3785}
3786
3787void SwingSchedulerDDG::initEdges(SUnit *SU) {
3788 for (const auto &PI : SU->Preds) {
3790 addEdge(SU, Edge);
3791 }
3792
3793 for (const auto &SI : SU->Succs) {
3795 addEdge(SU, Edge);
3796 }
3797}
3798
3801 : EntrySU(EntrySU), ExitSU(ExitSU) {
3802 EdgesVec.resize(SUnits.size());
3803
3804 initEdges(EntrySU);
3805 initEdges(ExitSU);
3806 for (auto &SU : SUnits)
3807 initEdges(&SU);
3808}
3809
3812 return getEdges(SU).Preds;
3813}
3814
3817 return getEdges(SU).Succs;
3818}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
SmallVector< uint32_t, 0 > Writes
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
Modulo Software Pipelining
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
Implements a dense probed hash-table based set.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
BlockT * getHeader() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Generic base class for all target subtargets.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
static use_instr_iterator use_instr_end()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Pass interface - Implemented by all 'passes'.
AnalysisType & getAnalysis() const
getAnalysis() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Track the current register pressure at some position in the instruction stream, and remember the high...
void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dumpNode(const SUnit &SU) const override
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void dump() const override
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool isArtificial() const
Returns true if the edge represents an artificial dependence.
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
bool isOrderDep() const
Returns true if the edge represents a dependence that is not data, anti or output dependence.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
bool isOutputDep() const
Returns true if the edge represents output dependence.
Represents dependencies between instructions.
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU)
const EdgesType & getInEdges(const SUnit *SU) const
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool isMVEExpanderSupported()
Return true if the target can expand pipelined schedule with modulo variable expansion.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static Type * getVoidTy(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
The main class in the implementation of the target independent window scheduler.
unsigned getPosition() const
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
cl::opt< bool > SwpEnableCopyToPhi
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
Description of the encoding of one expression Op.
These values represent a non-pipelined step in the execution of an instruction.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
static constexpr LaneBitmask getNone()
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineInstr * LoopInductionVar
SmallVector< MachineOperand, 4 > BrCond
MachineInstr * LoopCompare
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const TargetPassConfig * PassConfig
const MachineDominatorTree * MDT
RegisterClassInfo * RegClassInfo
const MachineLoopInfo * MLI
Store the effects of a change in pressure on things that MI scheduler cares about.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.