LLVM: lib/CodeGen/MachinePipeliner.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
72#include "llvm/Config/llvm-config.h"
83#include
84#include
85#include
86#include
87#include
88#include
89#include
90#include
91#include
92#include
93#include
94#include
95#include
96#include
97
98using namespace llvm;
99
100#define DEBUG_TYPE "pipeliner"
101
102STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
103STATISTIC(NumPipelined, "Number of loops software pipelined");
104STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
105STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
106STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
107STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
108STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
109STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
110STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
111STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
112STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
113STATISTIC(NumFailTooManyStores, "Pipeliner abort due to too many stores");
114
115
117 cl::desc("Enable Software Pipelining"));
118
119
123
124
126 cl::desc("Size limit for the MII."),
128
129
130
132 cl::desc("Force pipeliner to use specified II."),
134
135
138 cl::desc("Maximum stages allowed in the generated scheduled."),
140
141
142
145 cl::desc("Prune dependences between unrelated Phi nodes."),
147
148
149
152 cl::desc("Prune loop carried order dependences."),
154
155#ifndef NDEBUG
157#endif
158
162
167
170 cl::desc("Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
173
177 "Use the experimental peeling code generator for software pipelining"));
178
180 cl::desc("Range to search for II"),
182
185 cl::desc("Limit register pressure of scheduled loop"));
186
190 cl::desc("Margin representing the unused percentage of "
191 "the register pressure limit"));
192
195 cl::desc("Use the MVE code generator for software pipelining"));
196
197
198
200 "pipeliner-max-num-stores",
201 cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden,
203
204
208 cl::desc("Enable CopyToPhi DAG Mutation"));
209
210
211
213 "pipeliner-force-issue-width",
214 cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
216
217
220 cl::desc("Set how to use window scheduling algorithm."),
222 "Turn off window algorithm."),
224 "Use window algorithm after SMS algorithm fails."),
226 "Use window algorithm instead of SMS algorithm.")));
227
228unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
230#ifndef NDEBUG
232#endif
234
236 "Modulo Software Pipelining", false, false)
243
244namespace {
245
246
247
251
252
254
255
257
259
260
262
264
266
268
269private:
271};
272
273
274
275
277
278 enum class InstrTag {
279 Barrier = 0,
280 LoadOrStore = 1,
281
282 FPExceptions = 2,
283
284 };
285
287 TaggedSUnit(SUnit *SU, InstrTag Tag)
289
290 InstrTag getTag() const { return InstrTag(getInt()); }
291 };
292
293
294 struct LoadStoreChunk {
297
298 void append(SUnit *SU);
299 };
300
303 std::vector &SUnits;
304
305
306 const unsigned N;
307
308
309 std::vector LoopCarried;
310
311
312
313
314
315
316
317
318
319
320
321
322 std::vector TaggedSUnits;
323
326
327public:
331
332
334
336 return LoopCarried[Idx];
337 }
338
339private:
340
341 std::optional getInstrTag(SUnit *SU) const;
342
343 void addLoopCarriedDepenenciesForChunks(const LoadStoreChunk &From,
344 const LoadStoreChunk &To);
345
346
347
348
349
350
351
352
355 bool PerformCheapCheck = false);
356
357 void computeDependenciesAux();
358};
359
360}
361
362
365 return false;
366
368 return false;
369
372 return false;
373
375 return false;
376
377
378
382 return false;
383
384 MF = &mf;
388 TII = MF->getSubtarget().getInstrInfo();
390
391 for (const auto &L : *MLI)
392 scheduleLoop(*L);
393
394 return false;
395}
396
397
398
399
400
401bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
403 for (const auto &InnerLoop : L)
404 Changed |= scheduleLoop(*InnerLoop);
405
406#ifndef NDEBUG
407
409 if (Limit >= 0) {
413 }
414#endif
415
416 setPragmaPipelineOptions(L);
417 if (!canPipelineLoop(L)) {
418 LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
421 L.getStartLoc(), L.getHeader())
422 << "Failed to pipeline loop";
423 });
424
425 LI.LoopPipelinerInfo.reset();
427 }
428
429 ++NumTrytoPipeline;
430 if (useSwingModuloScheduler())
431 Changed = swingModuloScheduler(L);
432
433 if (useWindowScheduler(Changed))
434 Changed = runWindowScheduler(L);
435
436 LI.LoopPipelinerInfo.reset();
438}
439
440void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
441
444
445 MachineBasicBlock *LBLK = L.getTopBlock();
446
447 if (LBLK == nullptr)
448 return;
449
451 if (BBLK == nullptr)
452 return;
453
455 if (TI == nullptr)
456 return;
457
458 MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
459 if (LoopID == nullptr)
460 return;
461
464
467
468 if (MD == nullptr)
469 continue;
470
472
473 if (S == nullptr)
474 continue;
475
476 if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
478 "Pipeline initiation interval hint metadata should have two operands.");
481 assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
482 } else if (S->getString() == "llvm.loop.pipeline.disable") {
484 }
485 }
486}
487
488
489
493
494
495 auto It = PhiDeps.find(Reg);
496 if (It == PhiDeps.end())
497 return false;
498
500 return true;
502 return false;
503
506
507 for (unsigned Dep : It->second) {
509 return true;
510 }
511
513 return false;
514}
515
519
520
522 unsigned DefReg = MI.getOperand(0).getReg();
523 auto Ins = PhiDeps.try_emplace(DefReg).first;
524
525
526 for (unsigned I = 1; I < MI.getNumOperands(); I += 2)
527 Ins->second.push_back(MI.getOperand(I).getReg());
528 }
529
530
532
533
534 for (const auto &KV : PhiDeps) {
535 unsigned Reg = KV.first;
537 return true;
538 }
539
540 return false;
541}
542
543
544
545
546bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
547 if (L.getNumBlocks() != 1) {
548 ORE->emit([&]() {
549 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
550 L.getStartLoc(), L.getHeader())
551 << "Not a single basic block: "
552 << ore::NV("NumBlocks", L.getNumBlocks());
553 });
554 return false;
555 }
556
558 LLVM_DEBUG(dbgs() << "Cannot pipeline loop due to PHI cycle\n");
559 return false;
560 }
561
563 ORE->emit([&]() {
564 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
565 L.getStartLoc(), L.getHeader())
566 << "Disabled by Pragma.";
567 });
568 return false;
569 }
570
571
572
573 LI.TBB = nullptr;
574 LI.FBB = nullptr;
575 LI.BrCond.clear();
576 if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
577 LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
578 NumFailBranch++;
579 ORE->emit([&]() {
580 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
581 L.getStartLoc(), L.getHeader())
582 << "The branch can't be understood";
583 });
584 return false;
585 }
586
587 LI.LoopInductionVar = nullptr;
588 LI.LoopCompare = nullptr;
589 LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());
590 if (.LoopPipelinerInfo) {
591 LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
592 NumFailLoop++;
593 ORE->emit([&]() {
594 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
595 L.getStartLoc(), L.getHeader())
596 << "The loop structure is not supported";
597 });
598 return false;
599 }
600
601 if (.getLoopPreheader()) {
602 LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
603 NumFailPreheader++;
604 ORE->emit([&]() {
605 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
606 L.getStartLoc(), L.getHeader())
607 << "No loop preheader found";
608 });
609 return false;
610 }
611
612 unsigned NumStores = 0;
613 for (MachineInstr &MI : *L.getHeader())
614 if (MI.mayStore())
615 ++NumStores;
618 NumFailTooManyStores++;
619 ORE->emit([&]() {
620 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
621 L.getStartLoc(), L.getHeader())
622 << "Too many store instructions in the loop: "
623 << ore::NV("NumStores", NumStores) << " > "
625 });
626 return false;
627 }
628
629
630 preprocessPhiNodes(*L.getHeader());
631 return true;
632}
633
635 MachineRegisterInfo &MRI = MF->getRegInfo();
636 SlotIndexes &Slots =
638
639 for (MachineInstr &PI : B.phis()) {
640 MachineOperand &DefOp = PI.getOperand(0);
642 auto *RC = MRI.getRegClass(DefOp.getReg());
643
644 for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
645 MachineOperand &RegOp = PI.getOperand(i);
647 continue;
648
649
650
651 Register NewReg = MRI.createVirtualRegister(RC);
652 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
655 auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
659 RegOp.setReg(NewReg);
661 }
662 }
663}
664
665
666
667
668
669bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
670 assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
671
673 SwingSchedulerDAG SMS(
676
677 MachineBasicBlock *MBB = L.getHeader();
678
679
680 SMS.startBlock(MBB);
681
682
683
688 ;
689
691 SMS.schedule();
692 SMS.exitRegion();
693
694 SMS.finishBlock();
695 return SMS.hasNewSchedule();
696}
697
708
709bool MachinePipeliner::runWindowScheduler(MachineLoop &L) {
712 Context.MLI = MLI;
713 Context.MDT = MDT;
717 Context.RegClassInfo->runOnMachineFunction(*MF);
719 return WS.run();
720}
721
722bool MachinePipeliner::useSwingModuloScheduler() {
723
725}
726
727bool MachinePipeliner::useWindowScheduler(bool Changed) {
728
729
730
731
733 LLVM_DEBUG(dbgs() << "Window scheduling is disabled when "
734 "llvm.loop.pipeline.initiationinterval is set.\n");
735 return false;
736 }
737
740}
741
742void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
745 else if (II_setByPragma > 0)
746 MII = II_setByPragma;
747 else
748 MII = std::max(ResMII, RecMII);
749}
750
751void SwingSchedulerDAG::setMAX_II() {
754 else if (II_setByPragma > 0)
755 MAX_II = II_setByPragma;
756 else
758}
759
760
761
765 updatePhiDependences();
766 Topo.InitDAGTopologicalSorting();
767 changeDependences();
768 postProcessDAG();
769 DDG = std::make_unique(SUnits, &EntrySU, &ExitSU, LCE);
772 dbgs() << "===== Loop Carried Edges Begin =====\n";
775 dbgs() << "===== Loop Carried Edges End =====\n";
776 });
777
778 NodeSetType NodeSets;
779 findCircuits(NodeSets);
780 NodeSetType Circuits = NodeSets;
781
782
783 unsigned ResMII = calculateResMII();
784 unsigned RecMII = calculateRecMII(NodeSets);
785
786 fuseRecs(NodeSets);
787
788
790 RecMII = 0;
791
792 setMII(ResMII, RecMII);
793 setMAX_II();
794
795 LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
796 << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
797
798
799 if (MII == 0) {
800 LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
801 NumFailZeroMII++;
802 Pass.ORE->emit([&]() {
804 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
805 << "Invalid Minimal Initiation Interval: 0";
806 });
807 return;
808 }
809
810
813 << ", we don't pipeline large loops\n");
814 NumFailLargeMaxMII++;
815 Pass.ORE->emit([&]() {
817 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
818 << "Minimal Initiation Interval too large: "
819 << ore::NV("MII", (int)MII) << " > "
821 << "Refer to -pipeliner-max-mii.";
822 });
823 return;
824 }
825
826 computeNodeFunctions(NodeSets);
827
828 registerPressureFilter(NodeSets);
829
830 colocateNodeSets(NodeSets);
831
832 checkNodeSets(NodeSets);
833
835 for (auto &I : NodeSets) {
836 dbgs() << " Rec NodeSet ";
837 I.dump();
838 }
839 });
840
842
843 groupRemainingNodes(NodeSets);
844
845 removeDuplicateNodes(NodeSets);
846
848 for (auto &I : NodeSets) {
849 dbgs() << " NodeSet ";
850 I.dump();
851 }
852 });
853
854 computeNodeOrder(NodeSets);
855
856
857 checkValidNodeOrder(Circuits);
858
860 Scheduled = schedulePipeline(Schedule);
861
862 if (!Scheduled){
864 NumFailNoSchedule++;
865 Pass.ORE->emit([&]() {
867 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
868 << "Unable to find schedule";
869 });
870 return;
871 }
872
874
875 if (numStages == 0) {
876 LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
877 NumFailZeroStage++;
878 Pass.ORE->emit([&]() {
880 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
881 << "No need to pipeline - no overlapped iterations in schedule.";
882 });
883 return;
884 }
885
888 << " : too many stages, abort\n");
889 NumFailLargeMaxStage++;
890 Pass.ORE->emit([&]() {
892 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
893 << "Too many stages in schedule: "
894 << ore::NV("numStages", (int)numStages) << " > "
896 << ". Refer to -pipeliner-max-stages.";
897 });
898 return;
899 }
900
901 Pass.ORE->emit([&]() {
903 Loop.getHeader())
904 << "Pipelined succesfully!";
905 });
906
907
909 std::vector<MachineInstr *> OrderedInsts;
913 OrderedInsts.push_back(SU->getInstr());
914 Cycles[SU->getInstr()] = Cycle;
915 Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
916 }
917 }
919 for (auto &KV : NewMIs) {
920 Cycles[KV.first] = Cycles[KV.second];
921 Stages[KV.first] = Stages[KV.second];
922 NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
923 }
924
925 ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
926 std::move(Stages));
929 "Cannot serialize a schedule with InstrChanges!");
932 return;
933 }
934
939 LoopPipelinerInfo->isMVEExpanderSupported() &&
943 } else {
947 }
948 ++NumPipelined;
949}
950
951
953 for (auto &KV : NewMIs)
954 MF.deleteMachineInstr(KV.second);
955 NewMIs.clear();
956
957
959}
960
961
962
965 assert(Phi.isPHI() && "Expecting a Phi.");
966
969 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
970 if (Phi.getOperand(i + 1).getMBB() != Loop)
971 InitVal = Phi.getOperand(i).getReg();
972 else
973 LoopVal = Phi.getOperand(i).getReg();
974
975 assert(InitVal && LoopVal && "Unexpected Phi structure.");
976}
977
978
981 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
982 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
983 return Phi.getOperand(i).getReg();
985}
986
987
992 while (!Worklist.empty()) {
994 for (const auto &SI : SU->Succs) {
995 SUnit *SuccSU = SI.getSUnit();
997 if (Visited.count(SuccSU))
998 continue;
999 if (SuccSU == SUb)
1000 return true;
1002 Visited.insert(SuccSU);
1003 }
1004 }
1005 }
1006 return false;
1007}
1008
1010 if (!getUnderlyingObjects())
1011 return;
1015 break;
1016 }
1017}
1018
1021
1022
1024 return false;
1027 return false;
1028 return true;
1029}
1030
1031
1032
1033
1034
1035bool SUnitWithMemInfo::getUnderlyingObjects() {
1037 if (->hasOneMemOperand())
1038 return false;
1041 return false;
1045
1046
1047
1049 return true;
1050}
1051
1052
1053
1054static bool
1059 if (Src.isTriviallyDisjoint(Dst))
1060 return false;
1062 return false;
1063
1066 if (PerformCheapCheck) {
1067
1068
1069
1070
1071
1073 int64_t Offset1, Offset2;
1074 bool Offset1IsScalable, Offset2IsScalable;
1075 if (TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
1077 TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
1080 Offset1IsScalable == Offset2IsScalable &&
1081 (int)Offset1 < (int)Offset2) {
1082 assert(TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
1083 "What happened to the chain edge?");
1084 return true;
1085 }
1086 }
1087 }
1088
1090 return false;
1091
1092
1093
1094
1095 if (Src.isUnknown() || Dst.isUnknown())
1096 return true;
1097 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1098 return true;
1099
1103 return false;
1104
1105
1106
1107
1108 for (const Value *SrcObj : Src.UnderlyingObjs)
1109 for (const Value *DstObj : Dst.UnderlyingObjs)
1112 return true;
1113
1114 return false;
1115}
1116
1117void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
1118 const MachineInstr *MI = SU->getInstr();
1119 if (->mayLoadOrStore())
1120 return;
1121 (MI->mayStore() ? Stores : Loads).emplace_back(SU);
1122}
1123
1127 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.size()),
1128 LoopCarried(N, BitVector(N)), TII(TII), TRI(TRI) {}
1129
1131
1132 for (auto &SU : SUnits) {
1133 auto Tagged = getInstrTag(&SU);
1134
1135
1136 if (!Tagged)
1137 continue;
1138 TaggedSUnits.emplace_back(&SU, *Tagged);
1139 }
1140
1141 computeDependenciesAux();
1142}
1143
1144std::optionalLoopCarriedOrderDepsTracker::InstrTag
1145LoopCarriedOrderDepsTracker::getInstrTag(SUnit *SU) const {
1147 if (TII->isGlobalMemoryObject(MI))
1148 return InstrTag::Barrier;
1149
1150 if (MI->mayStore() ||
1151 (MI->mayLoad() && ->isDereferenceableInvariantLoad()))
1152 return InstrTag::LoadOrStore;
1153
1154 if (MI->mayRaiseFPException())
1155 return InstrTag::FPExceptions;
1156
1157 return std::nullopt;
1158}
1159
1160void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1161 const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
1162 bool PerformCheapCheck) {
1163
1164 if (Src.SU == Dst.SU)
1165 return;
1166
1168 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
1169}
1170
1171void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1172 const LoadStoreChunk &From, const LoadStoreChunk &To) {
1173
1174 for (const SUnitWithMemInfo &Src : From.Loads)
1175 for (const SUnitWithMemInfo &Dst : To.Stores)
1176
1177 addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
1178
1179
1180 for (const SUnitWithMemInfo &Src : From.Stores)
1181 for (const SUnitWithMemInfo &Dst : To.Loads)
1182 addDependenciesBetweenSUs(Src, Dst);
1183
1184
1185 for (const SUnitWithMemInfo &Src : From.Stores)
1186 for (const SUnitWithMemInfo &Dst : To.Stores)
1187 addDependenciesBetweenSUs(Src, Dst);
1188}
1189
1190void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1192 for (const auto &TSU : TaggedSUnits) {
1193 InstrTag Tag = TSU.getTag();
1194 SUnit *SU = TSU.getPointer();
1195 switch (Tag) {
1196 case InstrTag::Barrier:
1197 Chunks.emplace_back();
1198 break;
1199 case InstrTag::LoadOrStore:
1200 Chunks.back().append(SU);
1201 break;
1202 case InstrTag::FPExceptions:
1203
1204 break;
1205 }
1206 }
1207
1208
1209
1210
1211 for (const LoadStoreChunk &Chunk : Chunks)
1212 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1213
1214
1215
1216
1217}
1218
1219
1220
1221
1222
1223
1224LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
1225 LoopCarriedEdges LCE;
1226
1227
1229 LCODTracker.computeDependencies();
1230 for (unsigned I = 0; I != SUnits.size(); I++)
1231 for (const int Succ : LCODTracker.getLoopCarried(I).set_bits())
1233
1235 return LCE;
1236}
1237
1238
1239
1240
1241
1242
1243
1244void SwingSchedulerDAG::updatePhiDependences() {
1246 const TargetSubtargetInfo &ST = MF.getSubtarget();
1247
1248
1249 for (SUnit &I : SUnits) {
1250 RemoveDeps.clear();
1251
1254 MachineInstr *MI = I.getInstr();
1255
1256 for (const MachineOperand &MO : MI->operands()) {
1257 if (!MO.isReg())
1258 continue;
1260 if (MO.isDef()) {
1261
1263 UI = MRI.use_instr_begin(Reg),
1264 UE = MRI.use_instr_end();
1265 UI != UE; ++UI) {
1266 MachineInstr *UseMI = &*UI;
1267 SUnit *SU = getSUnit(UseMI);
1268 if (SU != nullptr && UseMI->isPHI()) {
1269 if (->isPHI()) {
1271 Dep.setLatency(1);
1272 I.addPred(Dep);
1273 } else {
1274 HasPhiDef = Reg;
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1290 }
1291 }
1292 }
1293 } else if (MO.isUse()) {
1294
1295 MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
1296 if (DefMI == nullptr)
1297 continue;
1298 SUnit *SU = getSUnit(DefMI);
1299 if (SU != nullptr && DefMI->isPHI()) {
1300 if (->isPHI()) {
1302 Dep.setLatency(0);
1303 ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep,
1304 &SchedModel);
1305 I.addPred(Dep);
1306 } else {
1307 HasPhiUse = Reg;
1308
1309
1310 if (SU->NodeNum < I.NodeNum && .isPred(SU))
1312 }
1313 }
1314 }
1315 }
1316
1318 continue;
1319 for (auto &PI : I.Preds) {
1320 MachineInstr *PMI = PI.getSUnit()->getInstr();
1322 if (I.getInstr()->isPHI()) {
1324 continue;
1326 continue;
1327 }
1329 }
1330 }
1331 for (const SDep &D : RemoveDeps)
1333 }
1334}
1335
1336
1337
1338void SwingSchedulerDAG::changeDependences() {
1339
1340
1341
1342 for (SUnit &I : SUnits) {
1343 unsigned BasePos = 0, OffsetPos = 0;
1345 int64_t NewOffset = 0;
1346 if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
1347 NewOffset))
1348 continue;
1349
1350
1351 Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1352 MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
1354 continue;
1355 SUnit *DefSU = getSUnit(DefMI);
1356 if (!DefSU)
1357 continue;
1358
1359 MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
1360 if (!LastMI)
1361 continue;
1362 SUnit *LastSU = getSUnit(LastMI);
1363 if (!LastSU)
1364 continue;
1365
1366 if (Topo.IsReachable(&I, LastSU))
1367 continue;
1368
1369
1371 for (const SDep &P : I.Preds)
1372 if (P.getSUnit() == DefSU)
1374 for (const SDep &D : Deps) {
1375 Topo.RemovePred(&I, D.getSUnit());
1377 }
1378
1379 Deps.clear();
1380 for (auto &P : LastSU->Preds)
1381 if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1382 Deps.push_back(P);
1383 for (const SDep &D : Deps) {
1384 Topo.RemovePred(LastSU, D.getSUnit());
1386 }
1387
1388
1389
1391 Topo.AddPred(LastSU, &I);
1393
1394
1395
1396 InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
1397 }
1398}
1399
1400
1401
1402
1403
1404
1407 std::vector<MachineInstr *> &OrderedInsts,
1410
1411
1414 for (int Stage = 0, LastStage = Schedule.getMaxStageCount();
1415 Stage <= LastStage; ++Stage) {
1418 Instrs[Cycle].push_front(SU);
1419 }
1420 }
1421 }
1422
1425 std::deque<SUnit *> &CycleInstrs = Instrs[Cycle];
1427 for (SUnit *SU : CycleInstrs) {
1429 OrderedInsts.push_back(MI);
1431 }
1432 }
1433}
1434
1435namespace {
1436
1437
1438
1439struct FuncUnitSorter {
1440 const InstrItineraryData *InstrItins;
1441 const MCSubtargetInfo *STI;
1442 DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1443
1444 FuncUnitSorter(const TargetSubtargetInfo &TSI)
1445 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1446
1447
1448
1449
1450 unsigned minFuncUnits(const MachineInstr *Inst,
1453 unsigned min = UINT_MAX;
1454 if (InstrItins && !InstrItins->isEmpty()) {
1455 for (const InstrStage &IS :
1457 InstrItins->endStage(SchedClass))) {
1459 unsigned numAlternatives = llvm::popcount(funcUnits);
1460 if (numAlternatives < min) {
1461 min = numAlternatives;
1462 F = funcUnits;
1463 }
1464 }
1465 return min;
1466 }
1468 const MCSchedClassDesc *SCDesc =
1471
1472
1473 return min;
1474
1475 for (const MCWriteProcResEntry &PRE :
1478 if (!PRE.ReleaseAtCycle)
1479 continue;
1480 const MCProcResourceDesc *ProcResource =
1482 unsigned NumUnits = ProcResource->NumUnits;
1483 if (NumUnits < min) {
1484 min = NumUnits;
1485 F = PRE.ProcResourceIdx;
1486 }
1487 }
1488 return min;
1489 }
1490 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1491 }
1492
1493
1494
1495
1496
1497
1498 void calcCriticalResources(MachineInstr &MI) {
1499 unsigned SchedClass = MI.getDesc().getSchedClass();
1500 if (InstrItins && !InstrItins->isEmpty()) {
1501 for (const InstrStage &IS :
1503 InstrItins->endStage(SchedClass))) {
1506 Resources[FuncUnits]++;
1507 }
1508 return;
1509 }
1511 const MCSchedClassDesc *SCDesc =
1514
1515
1516 return;
1517
1518 for (const MCWriteProcResEntry &PRE :
1521 if (!PRE.ReleaseAtCycle)
1522 continue;
1523 Resources[PRE.ProcResourceIdx]++;
1524 }
1525 return;
1526 }
1527 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1528 }
1529
1530
1531 bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1533 unsigned MFUs1 = minFuncUnits(IS1, F1);
1534 unsigned MFUs2 = minFuncUnits(IS2, F2);
1535 if (MFUs1 == MFUs2)
1536 return Resources.lookup(F1) < Resources.lookup(F2);
1537 return MFUs1 > MFUs2;
1538 }
1539};
1540
1541
1542class HighRegisterPressureDetector {
1543 MachineBasicBlock *OrigMBB;
1544 const MachineRegisterInfo &MRI;
1545 const TargetRegisterInfo *TRI;
1546
1547 const unsigned PSetNum;
1548
1549
1550
1551
1552
1553 std::vector InitSetPressure;
1554
1555
1556
1557 std::vector PressureSetLimit;
1558
1559 DenseMap<MachineInstr *, RegisterOperands> ROMap;
1560
1561 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;
1562
1563public:
1564 using OrderedInstsTy = std::vector<MachineInstr *>;
1565 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;
1566
1567private:
1568 static void dumpRegisterPressures(const std::vector &Pressures) {
1569 if (Pressures.size() == 0) {
1570 dbgs() << "[]";
1571 } else {
1573 for (unsigned P : Pressures) {
1576 }
1577 dbgs() << ']';
1578 }
1579 }
1580
1583
1584 VirtRegOrUnit VRegOrUnit =
1586 : VirtRegOrUnit(static_cast(Reg.id()));
1587 for (auto PSetIter = MRI.getPressureSets(VRegOrUnit); PSetIter.isValid();
1588 ++PSetIter) {
1589 dbgs() << *PSetIter << ' ';
1590 }
1591 dbgs() << '\n';
1592 }
1593
1594 void increaseRegisterPressure(std::vector &Pressure,
1596
1597 VirtRegOrUnit VRegOrUnit =
1599 : VirtRegOrUnit(static_cast(Reg.id()));
1600 auto PSetIter = MRI.getPressureSets(VRegOrUnit);
1601 unsigned Weight = PSetIter.getWeight();
1602 for (; PSetIter.isValid(); ++PSetIter)
1603 Pressure[*PSetIter] += Weight;
1604 }
1605
1606 void decreaseRegisterPressure(std::vector &Pressure,
1608 auto PSetIter = MRI.getPressureSets(VirtRegOrUnit(Reg));
1609 unsigned Weight = PSetIter.getWeight();
1610 for (; PSetIter.isValid(); ++PSetIter) {
1611 auto &P = Pressure[*PSetIter];
1613 "register pressure must be greater than or equal weight");
1614 P -= Weight;
1615 }
1616 }
1617
1618
1619 bool isReservedRegister(Register Reg) const {
1621 }
1622
1623 bool isDefinedInThisLoop(Register Reg) const {
1624 return Reg.isVirtual() && MRI.getVRegDef(Reg)->getParent() == OrigMBB;
1625 }
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635 void computeLiveIn() {
1636 DenseSet Used;
1637 for (auto &MI : *OrigMBB) {
1638 if (MI.isDebugInstr())
1639 continue;
1640 for (auto &Use : ROMap[&MI].Uses) {
1641
1643 Use.VRegOrUnit.isVirtualReg()
1644 ? Use.VRegOrUnit.asVirtualReg()
1645 : Register(static_cast<unsigned>(Use.VRegOrUnit.asMCRegUnit()));
1646
1647
1649 continue;
1650 if (isReservedRegister(Reg))
1651 continue;
1652 if (isDefinedInThisLoop(Reg))
1653 continue;
1655 }
1656 }
1657
1658 for (auto LiveIn : Used)
1659 increaseRegisterPressure(InitSetPressure, LiveIn);
1660 }
1661
1662
1663 void computePressureSetLimit(const RegisterClassInfo &RCI) {
1664 for (unsigned PSet = 0; PSet < PSetNum; PSet++)
1666 }
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679 Instr2LastUsesTy computeLastUses(const OrderedInstsTy &OrderedInsts,
1680 Instr2StageTy &Stages) const {
1681
1682
1683
1684
1685 DenseSet TargetRegs;
1686 const auto UpdateTargetRegs = [this, &TargetRegs](Register Reg) {
1687 if (isDefinedInThisLoop(Reg))
1689 };
1690 for (MachineInstr *MI : OrderedInsts) {
1691 if (MI->isPHI()) {
1693 UpdateTargetRegs(Reg);
1694 } else {
1695 for (auto &Use : ROMap.find(MI)->getSecond().Uses) {
1696
1698 ? Use.VRegOrUnit.asVirtualReg()
1699 : Register(static_cast<unsigned>(
1700 Use.VRegOrUnit.asMCRegUnit()));
1701 UpdateTargetRegs(Reg);
1702 }
1703 }
1704 }
1705
1706 const auto InstrScore = [&Stages](MachineInstr *MI) {
1707 return Stages[MI] + MI->isPHI();
1708 };
1709
1710 DenseMap<Register, MachineInstr *> LastUseMI;
1712 for (auto &Use : ROMap.find(MI)->getSecond().Uses) {
1713
1715 Use.VRegOrUnit.isVirtualReg()
1716 ? Use.VRegOrUnit.asVirtualReg()
1717 : Register(static_cast<unsigned>(Use.VRegOrUnit.asMCRegUnit()));
1719 continue;
1721 if (!Inserted) {
1722 MachineInstr *Orig = Ite->second;
1724 if (InstrScore(Orig) < InstrScore(New))
1725 Ite->second = New;
1726 }
1727 }
1728 }
1729
1730 Instr2LastUsesTy LastUses;
1731 for (auto [Reg, MI] : LastUseMI)
1732 LastUses[MI].insert(Reg);
1733 return LastUses;
1734 }
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748 std::vector
1749 computeMaxSetPressure(const OrderedInstsTy &OrderedInsts,
1750 Instr2StageTy &Stages,
1751 const unsigned StageCount) const {
1752 using RegSetTy = SmallDenseSet<Register, 16>;
1753
1754
1755
1757
1758 auto CurSetPressure = InitSetPressure;
1759 auto MaxSetPressure = InitSetPressure;
1760 auto LastUses = computeLastUses(OrderedInsts, Stages);
1761
1763 dbgs() << "Ordered instructions:\n";
1764 for (MachineInstr *MI : OrderedInsts) {
1765 dbgs() << "Stage " << Stages[MI] << ": ";
1767 }
1768 });
1769
1770 const auto InsertReg = [this, &CurSetPressure](RegSetTy &RegSet,
1771 VirtRegOrUnit VRegOrUnit) {
1772
1778 return;
1779
1780 bool Inserted = RegSet.insert(Reg).second;
1781 if (!Inserted)
1782 return;
1783
1785 increaseRegisterPressure(CurSetPressure, Reg);
1787 };
1788
1789 const auto EraseReg = [this, &CurSetPressure](RegSetTy &RegSet,
1792 return;
1793
1794
1795 if (!RegSet.contains(Reg))
1796 return;
1797
1799 RegSet.erase(Reg);
1800 decreaseRegisterPressure(CurSetPressure, Reg);
1802 };
1803
1804 for (unsigned I = 0; I < StageCount; I++) {
1805 for (MachineInstr *MI : OrderedInsts) {
1806 const auto Stage = Stages[MI];
1807 if (I < Stage)
1808 continue;
1809
1810 const unsigned Iter = I - Stage;
1811
1812 for (auto &Def : ROMap.find(MI)->getSecond().Defs)
1813 InsertReg(LiveRegSets[Iter], Def.VRegOrUnit);
1814
1815 for (auto LastUse : LastUses[MI]) {
1816 if (MI->isPHI()) {
1817 if (Iter != 0)
1818 EraseReg(LiveRegSets[Iter - 1], LastUse);
1819 } else {
1820 EraseReg(LiveRegSets[Iter], LastUse);
1821 }
1822 }
1823
1824 for (unsigned PSet = 0; PSet < PSetNum; PSet++)
1825 MaxSetPressure[PSet] =
1826 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1827
1829 dbgs() << "CurSetPressure=";
1830 dumpRegisterPressures(CurSetPressure);
1831 dbgs() << " iter=" << Iter << " stage=" << Stage << ":";
1833 });
1834 }
1835 }
1836
1837 return MaxSetPressure;
1838 }
1839
1840public:
1841 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1842 const MachineFunction &MF)
1843 : OrigMBB(OrigMBB), MRI(MF.getRegInfo()),
1844 TRI(MF.getSubtarget().getRegisterInfo()),
1845 PSetNum(TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1846 PressureSetLimit(PSetNum, 0) {}
1847
1848
1849
1850 void init(const RegisterClassInfo &RCI) {
1851 for (MachineInstr &MI : *OrigMBB) {
1852 if (MI.isDebugInstr())
1853 continue;
1854 ROMap[&MI].collect(MI, *TRI, MRI, false, true);
1855 }
1856
1857 computeLiveIn();
1858 computePressureSetLimit(RCI);
1859 }
1860
1861
1862
1863 bool detect(const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1864 const unsigned MaxStage) const {
1866 "the percentage of the margin must be between 0 to 100");
1867
1868 OrderedInstsTy OrderedInsts;
1869 Instr2StageTy Stages;
1871 const auto MaxSetPressure =
1872 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1873
1875 dbgs() << "Dump MaxSetPressure:\n";
1876 for (unsigned I = 0; I < MaxSetPressure.size(); I++) {
1877 dbgs() << format("MaxSetPressure[%d]=%d\n", I, MaxSetPressure[I]);
1878 }
1879 dbgs() << '\n';
1880 });
1881
1882 for (unsigned PSet = 0; PSet < PSetNum; PSet++) {
1883 unsigned Limit = PressureSetLimit[PSet];
1885 LLVM_DEBUG(dbgs() << "PSet=" << PSet << " Limit=" << Limit
1886 << " Margin=" << Margin << "\n");
1887 if (Limit < MaxSetPressure[PSet] + Margin) {
1890 << "Rejected the schedule because of too high register pressure\n");
1891 return true;
1892 }
1893 }
1894 return false;
1895 }
1896};
1897
1898}
1899
1900
1901
1902
1903
1904
1905
1906unsigned SwingSchedulerDAG::calculateResMII() {
1908 ResourceManager RM(&MF.getSubtarget(), this);
1909 return RM.calculateResMII();
1910}
1911
1912
1913
1914
1915
1916
1917
1918unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1919 unsigned RecMII = 0;
1920
1921 for (NodeSet &Nodes : NodeSets) {
1922 if (Nodes.empty())
1923 continue;
1924
1925 unsigned Delay = Nodes.getLatency();
1926 unsigned Distance = 1;
1927
1928
1929 unsigned CurMII = (Delay + Distance - 1) / Distance;
1930 Nodes.setRecMII(CurMII);
1931 if (CurMII > RecMII)
1932 RecMII = CurMII;
1933 }
1934
1935 return RecMII;
1936}
1937
1938
1939void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1940 SwingSchedulerDAG *DAG) {
1941 BitVector Added(SUnits.size());
1942 DenseMap<int, int> OutputDeps;
1943 for (int i = 0, e = SUnits.size(); i != e; ++i) {
1945
1946 for (auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1947
1948
1949 if (OE.isOutputDep()) {
1950 int N = OE.getDst()->NodeNum;
1951 int BackEdge = i;
1952 auto Dep = OutputDeps.find(BackEdge);
1953 if (Dep != OutputDeps.end()) {
1954 BackEdge = Dep->second;
1955 OutputDeps.erase(Dep);
1956 }
1957 OutputDeps[N] = BackEdge;
1958 }
1959
1960 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1961 continue;
1962
1963
1964
1965
1966
1967
1968
1969 if (OE.isAntiDep())
1970 continue;
1971
1972 int N = OE.getDst()->NodeNum;
1973 if (.test(N)) {
1974 AdjK[i].push_back(N);
1976 }
1977 }
1978
1979
1980 for (auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1981 SUnit *Src = IE.getSrc();
1982 SUnit *Dst = IE.getDst();
1983 if (!Dst->getInstr()->mayStore() || !DAG->isLoopCarriedDep(IE))
1984 continue;
1985 if (IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1986 int N = Src->NodeNum;
1987 if (.test(N)) {
1988 AdjK[i].push_back(N);
1990 }
1991 }
1992 }
1993 }
1994
1995 for (auto &OD : OutputDeps)
1996 if (.test(OD.second)) {
1997 AdjK[OD.first].push_back(OD.second);
1998 Added.set(OD.second);
1999 }
2000}
2001
2002
2003
2004bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
2005 const SwingSchedulerDAG *DAG,
2006 bool HasBackedge) {
2007 SUnit *SV = &SUnits[V];
2008 bool F = false;
2009 Stack.insert(SV);
2010 Blocked.set(V);
2011
2012 for (auto W : AdjK[V]) {
2013 if (NumPaths > MaxPaths)
2014 break;
2015 if (W < S)
2016 continue;
2017 if (W == S) {
2018 if (!HasBackedge)
2020 F = true;
2021 ++NumPaths;
2022 break;
2023 }
2024 if (!Blocked.test(W)) {
2025 if (circuit(W, S, NodeSets, DAG,
2026 Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
2027 F = true;
2028 }
2029 }
2030
2031 if (F)
2032 unblock(V);
2033 else {
2034 for (auto W : AdjK[V]) {
2035 if (W < S)
2036 continue;
2038 }
2039 }
2040 Stack.pop_back();
2041 return F;
2042}
2043
2044
2045void SwingSchedulerDAG::Circuits::unblock(int U) {
2046 Blocked.reset(U);
2047 SmallPtrSet<SUnit *, 4> &BU = B[U];
2048 while (!BU.empty()) {
2049 SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();
2050 assert(SI != BU.end() && "Invalid B set.");
2053 if (Blocked.test(W->NodeNum))
2054 unblock(W->NodeNum);
2055 }
2056}
2057
2058
2059
2060void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
2061 Circuits Cir(SUnits, Topo);
2062
2063 Cir.createAdjacencyStructure(this);
2064 for (int I = 0, E = SUnits.size(); I != E; ++I) {
2065 Cir.reset();
2066 Cir.circuit(I, I, NodeSets, this);
2067 }
2068}
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
2089 for (SUnit &SU : DAG->SUnits) {
2090
2092 continue;
2093
2094
2096
2098
2099 for (auto &Dep : SU.Preds) {
2100 SUnit *TmpSU = Dep.getSUnit();
2101 MachineInstr *TmpMI = TmpSU->getInstr();
2103
2106
2107
2110 }
2111
2112 if (PHISUs.size() == 0 || SrcSUs.size() == 0)
2113 continue;
2114
2115
2116
2118
2119 for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
2120 for (auto &Dep : PHISUs[Index]->Succs) {
2122 continue;
2123
2124 SUnit *TmpSU = Dep.getSUnit();
2125 MachineInstr *TmpMI = TmpSU->getInstr();
2128 continue;
2129 }
2131 }
2132 }
2133
2134 if (UseSUs.size() == 0)
2135 continue;
2136
2138
2139 for (auto *I : UseSUs) {
2140 for (auto *Src : SrcSUs) {
2141 if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
2144 }
2145 }
2146 }
2147 }
2148}
2149
2150
2151
2152
2153
2154
2155
2156void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2157 ScheduleInfo.resize(SUnits.size());
2158
2160 for (int I : Topo) {
2161 const SUnit &SU = SUnits[I];
2162 dumpNode(SU);
2163 }
2164 });
2165
2166 int maxASAP = 0;
2167
2168 for (int I : Topo) {
2169 int asap = 0;
2170 int zeroLatencyDepth = 0;
2171 SUnit *SU = &SUnits[I];
2172 for (const auto &IE : DDG->getInEdges(SU)) {
2173 SUnit *Pred = IE.getSrc();
2174 if (IE.getLatency() == 0)
2175 zeroLatencyDepth =
2176 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2177 if (IE.ignoreDependence(true))
2178 continue;
2179 asap = std::max(asap, (int)(getASAP(Pred) + IE.getLatency() -
2180 IE.getDistance() * MII));
2181 }
2182 maxASAP = std::max(maxASAP, asap);
2183 ScheduleInfo[I].ASAP = asap;
2184 ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
2185 }
2186
2187
2189 int alap = maxASAP;
2190 int zeroLatencyHeight = 0;
2191 SUnit *SU = &SUnits[I];
2192 for (const auto &OE : DDG->getOutEdges(SU)) {
2193 SUnit *Succ = OE.getDst();
2195 continue;
2196 if (OE.getLatency() == 0)
2197 zeroLatencyHeight =
2198 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2199 if (OE.ignoreDependence(true))
2200 continue;
2201 alap = std::min(alap, (int)(getALAP(Succ) - OE.getLatency() +
2202 OE.getDistance() * MII));
2203 }
2204
2205 ScheduleInfo[I].ALAP = alap;
2206 ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;
2207 }
2208
2209
2210 for (NodeSet &I : NodeSets)
2211 I.computeNodeSetInfo(this);
2212
2214 for (unsigned i = 0; i < SUnits.size(); i++) {
2215 dbgs() << "\tNode " << i << ":\n";
2216 dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
2217 dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
2218 dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
2219 dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
2220 dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
2221 dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
2222 dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
2223 }
2224 });
2225}
2226
2227
2228
2229
2232 const NodeSet *S = nullptr) {
2234
2236 for (const auto &IE : DDG->getInEdges(SU)) {
2237 SUnit *PredSU = IE.getSrc();
2238 if (S && S->count(PredSU) == 0)
2239 continue;
2240 if (IE.ignoreDependence(true))
2241 continue;
2242 if (NodeOrder.count(PredSU) == 0)
2243 Preds.insert(PredSU);
2244 }
2245
2246
2247
2248
2249
2250 for (const auto &OE : DDG->getOutEdges(SU)) {
2251 SUnit *SuccSU = OE.getDst();
2252 if (!OE.isAntiDep())
2253 continue;
2254 if (S && S->count(SuccSU) == 0)
2255 continue;
2256 if (NodeOrder.count(SuccSU) == 0)
2257 Preds.insert(SuccSU);
2258 }
2259 }
2260 return !Preds.empty();
2261}
2262
2263
2264
2265
2268 const NodeSet *S = nullptr) {
2270
2272 for (const auto &OE : DDG->getOutEdges(SU)) {
2273 SUnit *SuccSU = OE.getDst();
2274 if (S && S->count(SuccSU) == 0)
2275 continue;
2276 if (OE.ignoreDependence(false))
2277 continue;
2278 if (NodeOrder.count(SuccSU) == 0)
2279 Succs.insert(SuccSU);
2280 }
2281
2282
2283
2284
2285
2286 for (const auto &IE : DDG->getInEdges(SU)) {
2287 SUnit *PredSU = IE.getSrc();
2288 if (!IE.isAntiDep())
2289 continue;
2290 if (S && S->count(PredSU) == 0)
2291 continue;
2292 if (NodeOrder.count(PredSU) == 0)
2293 Succs.insert(PredSU);
2294 }
2295 }
2296 return !Succs.empty();
2297}
2298
2299
2300
2307 return false;
2309 return false;
2310 if (DestNodes.contains(Cur))
2311 return true;
2312 if (!Visited.insert(Cur).second)
2313 return Path.contains(Cur);
2314 bool FoundPath = false;
2315 for (const auto &OE : DDG->getOutEdges(Cur))
2316 if (!OE.ignoreDependence(false))
2317 FoundPath |=
2318 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2319 for (const auto &IE : DDG->getInEdges(Cur))
2320 if (IE.isAntiDep() && IE.getDistance() == 0)
2321 FoundPath |=
2322 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2323 if (FoundPath)
2324 Path.insert(Cur);
2325 return FoundPath;
2326}
2327
2328
2329
2330
2337 for (SUnit *SU : NS) {
2339 if (MI->isPHI())
2340 continue;
2343 if (Reg.isVirtual())
2345 else if (MRI.isAllocatable(Reg))
2346 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
2348 }
2349 }
2350 for (SUnit *SU : NS)
2352 if (!MO.isDead()) {
2354 if (Reg.isVirtual()) {
2358 } else if (MRI.isAllocatable(Reg)) {
2359 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
2363 }
2364 }
2366}
2367
2368
2369
2370void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2371 for (auto &NS : NodeSets) {
2372
2373 if (NS.size() <= 2)
2374 continue;
2375 IntervalPressure RecRegPressure;
2376 RegPressureTracker RecRPTracker(RecRegPressure);
2377 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
2379 RecRPTracker.closeBottom();
2380
2381 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2382 llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
2383 return A->NodeNum > B->NodeNum;
2384 });
2385
2386 for (auto &SU : SUnits) {
2387
2388
2389
2390
2392 RecRPTracker.setPos(std::next(CurInstI));
2393
2394 RegPressureDelta RPDelta;
2396 RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
2397 CriticalPSets,
2401 dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
2404 NS.setExceedPressure(SU);
2405 break;
2406 }
2407 RecRPTracker.recede();
2408 }
2409 }
2410}
2411
2412
2413
2414void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2415 unsigned Colocate = 0;
2416 for (int i = 0, e = NodeSets.size(); i < e; ++i) {
2417 NodeSet &N1 = NodeSets[i];
2418 SmallSetVector<SUnit *, 8> S1;
2420 continue;
2421 for (int j = i + 1; j < e; ++j) {
2424 continue;
2425 SmallSetVector<SUnit *, 8> S2;
2426 if (N2.empty() || (N2, S2, DDG.get()))
2427 continue;
2431 break;
2432 }
2433 }
2434 }
2435}
2436
2437
2438
2439
2440
2441
2442void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2443
2444 if (MII < 17)
2445 return;
2446
2447 for (auto &NS : NodeSets) {
2448 if (NS.getRecMII() > 2)
2449 return;
2450 if (NS.getMaxDepth() > MII)
2451 return;
2452 }
2453 NodeSets.clear();
2454 LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
2455}
2456
2457
2458
2459void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2460 SetVector<SUnit *> NodesAdded;
2461 SmallPtrSet<SUnit *, 8> Visited;
2462
2463
2464 for (NodeSet &I : NodeSets) {
2465 SmallSetVector<SUnit *, 8> N;
2466
2467 if (succ_L(I, N, DDG.get())) {
2468 SetVector<SUnit *> Path;
2469 for (SUnit *NI : N) {
2470 Visited.clear();
2471 computePath(NI, Path, NodesAdded, I, Visited, DDG.get());
2472 }
2473 if (.empty())
2474 I.insert(Path.begin(), Path.end());
2475 }
2476
2477 N.clear();
2478 if (succ_L(NodesAdded, N, DDG.get())) {
2479 SetVector<SUnit *> Path;
2480 for (SUnit *NI : N) {
2481 Visited.clear();
2482 computePath(NI, Path, I, NodesAdded, Visited, DDG.get());
2483 }
2484 if (.empty())
2485 I.insert(Path.begin(), Path.end());
2486 }
2488 }
2489
2490
2491
2493 SmallSetVector<SUnit *, 8> N;
2494 if (succ_L(NodesAdded, N, DDG.get()))
2496 addConnectedNodes(I, NewSet, NodesAdded);
2497 if (!NewSet.empty())
2498 NodeSets.push_back(NewSet);
2499
2500
2501
2503 if (pred_L(NodesAdded, N, DDG.get()))
2505 addConnectedNodes(I, NewSet, NodesAdded);
2506 if (!NewSet.empty())
2507 NodeSets.push_back(NewSet);
2508
2509
2510
2511 for (SUnit &SU : SUnits) {
2512 if (NodesAdded.count(&SU) == 0) {
2514 addConnectedNodes(&SU, NewSet, NodesAdded);
2515 if (!NewSet.empty())
2516 NodeSets.push_back(NewSet);
2517 }
2518 }
2519}
2520
2521
2522void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2523 SetVector<SUnit *> &NodesAdded) {
2525 NodesAdded.insert(SU);
2526 for (auto &OE : DDG->getOutEdges(SU)) {
2528 if (!OE.isArtificial() && ->isBoundaryNode() &&
2530 addConnectedNodes(Successor, NewSet, NodesAdded);
2531 }
2532 for (auto &IE : DDG->getInEdges(SU)) {
2533 SUnit *Predecessor = IE.getSrc();
2534 if (.isArtificial() && NodesAdded.count(Predecessor) == 0)
2535 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2536 }
2537}
2538
2539
2540
2543 Result.clear();
2544 for (SUnit *SU : Set1) {
2545 if (Set2.count(SU) != 0)
2546 Result.insert(SU);
2547 }
2548 return !Result.empty();
2549}
2550
2551
2552void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2553 for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2554 ++I) {
2556 for (NodeSetType::iterator J = I + 1; J != E;) {
2561 for (SUnit *SU : *J)
2562 I->insert(SU);
2563 NodeSets.erase(J);
2564 E = NodeSets.end();
2565 } else {
2566 ++J;
2567 }
2568 }
2569 }
2570}
2571
2572
2573void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2574 for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2575 ++I)
2576 for (NodeSetType::iterator J = I + 1; J != E;) {
2577 J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
2578
2579 if (J->empty()) {
2580 NodeSets.erase(J);
2581 E = NodeSets.end();
2582 } else {
2583 ++J;
2584 }
2585 }
2586}
2587
2588
2589
2590
2591
2592void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2593 SmallSetVector<SUnit *, 8> R;
2595
2596 for (auto &Nodes : NodeSets) {
2597 LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2598 OrderKind Order;
2599 SmallSetVector<SUnit *, 8> N;
2610
2611
2614 } else if (NodeSets.size() == 1) {
2615 for (const auto &N : Nodes)
2616 if (N->Succs.size() == 0)
2620 } else {
2621
2622 SUnit *maxASAP = nullptr;
2623 for (SUnit *SU : Nodes) {
2624 if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
2625 (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
2626 maxASAP = SU;
2627 }
2628 R.insert(maxASAP);
2631 }
2632
2633 while (.empty()) {
2634 if (Order == TopDown) {
2635
2636
2637
2638 while (.empty()) {
2639 SUnit *maxHeight = nullptr;
2640 for (SUnit *I : R) {
2641 if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
2642 maxHeight = I;
2643 else if (getHeight(I) == getHeight(maxHeight) &&
2644 getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
2645 maxHeight = I;
2646 else if (getHeight(I) == getHeight(maxHeight) &&
2647 getZeroLatencyHeight(I) ==
2648 getZeroLatencyHeight(maxHeight) &&
2649 getMOV(I) < getMOV(maxHeight))
2650 maxHeight = I;
2651 }
2654 R.remove(maxHeight);
2655 for (const auto &OE : DDG->getOutEdges(maxHeight)) {
2656 SUnit *SU = OE.getDst();
2657 if (Nodes.count(SU) == 0)
2658 continue;
2660 continue;
2661 if (OE.ignoreDependence(false))
2662 continue;
2663 R.insert(SU);
2664 }
2665
2666
2667
2668
2669
2670 for (const auto &IE : DDG->getInEdges(maxHeight)) {
2671 SUnit *SU = IE.getSrc();
2672 if (.isAntiDep())
2673 continue;
2674 if (Nodes.count(SU) == 0)
2675 continue;
2677 continue;
2678 R.insert(SU);
2679 }
2680 }
2682 LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
2683 SmallSetVector<SUnit *, 8> N;
2686 } else {
2687
2688
2689
2690 while (.empty()) {
2691 SUnit *maxDepth = nullptr;
2692 for (SUnit *I : R) {
2693 if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
2694 maxDepth = I;
2695 else if (getDepth(I) == getDepth(maxDepth) &&
2696 getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
2697 maxDepth = I;
2698 else if (getDepth(I) == getDepth(maxDepth) &&
2699 getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
2700 getMOV(I) < getMOV(maxDepth))
2701 maxDepth = I;
2702 }
2705 R.remove(maxDepth);
2706 if (Nodes.isExceedSU(maxDepth)) {
2708 R.clear();
2709 R.insert(Nodes.getNode(0));
2710 break;
2711 }
2712 for (const auto &IE : DDG->getInEdges(maxDepth)) {
2713 SUnit *SU = IE.getSrc();
2714 if (Nodes.count(SU) == 0)
2715 continue;
2717 continue;
2718 R.insert(SU);
2719 }
2720
2721
2722
2723
2724
2725 for (const auto &OE : DDG->getOutEdges(maxDepth)) {
2726 SUnit *SU = OE.getDst();
2727 if (!OE.isAntiDep())
2728 continue;
2729 if (Nodes.count(SU) == 0)
2730 continue;
2732 continue;
2733 R.insert(SU);
2734 }
2735 }
2737 LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
2738 SmallSetVector<SUnit *, 8> N;
2741 }
2742 }
2744 }
2745
2747 dbgs() << "Node order: ";
2749 dbgs() << " " << I->NodeNum << " ";
2750 dbgs() << "\n";
2751 });
2752}
2753
2754
2755
2756bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2757
2759 LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
2760 return false;
2761 }
2762
2763 bool scheduleFound = false;
2764 std::unique_ptr HRPDetector;
2766 HRPDetector =
2767 std::make_unique(Loop.getHeader(), MF);
2768 HRPDetector->init(RegClassInfo);
2769 }
2770
2771 for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2772 Schedule.reset();
2774 LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2775
2778 do {
2779 SUnit *SU = *NI;
2780
2781
2782
2783 int EarlyStart = INT_MIN;
2784 int LateStart = INT_MAX;
2785 Schedule.computeStart(SU, &EarlyStart, &LateStart, II, this);
2787 dbgs() << "\n";
2788 dbgs() << "Inst (" << SU->NodeNum << ") ";
2790 dbgs() << "\n";
2791 });
2793 dbgs() << format("\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2794
2795 if (EarlyStart > LateStart)
2796 scheduleFound = false;
2797 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2798 scheduleFound =
2799 Schedule.insert(SU, EarlyStart, EarlyStart + (int)II - 1, II);
2800 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2801 scheduleFound =
2802 Schedule.insert(SU, LateStart, LateStart - (int)II + 1, II);
2803 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2804 LateStart = std::min(LateStart, EarlyStart + (int)II - 1);
2805
2806
2807
2808
2809
2810
2813 scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II);
2814 else
2815 scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II);
2816 } else {
2818 scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2819 FirstCycle + getASAP(SU) + II - 1, II);
2820 }
2821
2822
2823
2824 if (scheduleFound)
2827 scheduleFound = false;
2828
2830 if (!scheduleFound)
2831 dbgs() << "\tCan't schedule\n";
2832 });
2833 } while (++NI != NE && scheduleFound);
2834
2835
2836
2837 if (scheduleFound)
2838 scheduleFound = DDG->isValidSchedule(Schedule);
2839
2840
2841 if (scheduleFound)
2842 scheduleFound =
2844
2845
2846 if (scheduleFound)
2848
2849
2850
2852 scheduleFound =
2853 !HRPDetector->detect(this, Schedule, Schedule.getMaxStageCount());
2854 }
2855
2856 LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
2858 << ")\n");
2859
2860 if (scheduleFound) {
2861 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);
2862 if (!scheduleFound)
2864 }
2865
2866 if (scheduleFound) {
2868 Pass.ORE->emit([&]() {
2869 return MachineOptimizationRemarkAnalysis(
2870 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
2871 << "Schedule found with Initiation Interval: "
2873 << ", MaxStageCount: "
2875 });
2876 } else
2877 Schedule.reset();
2878
2880}
2881
2887 if (.isVirtual())
2889 if (MRI.getVRegDef(Reg)->getParent() != MI.getParent())
2890 continue;
2891 if (Result)
2893 Result = Reg;
2894 }
2895 return Result;
2896}
2897
2898
2899
2901 if (.isReg() ||
.getReg().isVirtual())
2902 return false;
2903
2908
2913
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925 while (true) {
2927 return false;
2929 if (Def->getParent() != LoopBB)
2930 return false;
2931
2932 if (Def->isCopy()) {
2933
2934 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2935 return false;
2936 CurReg = Def->getOperand(1).getReg();
2937 } else if (Def->isPHI()) {
2938
2939 if (Phi)
2940 return false;
2941 Phi = Def;
2943 } else if (TII->getIncrementValue(*Def, Value)) {
2944
2946
2947 return false;
2948
2951 bool OffsetIsScalable;
2952 if (TII->getMemOperandWithOffset(*Def, BaseOp, Offset, OffsetIsScalable,
2954
2955 CurReg = BaseOp->getReg();
2956 } else {
2957
2958
2961 return false;
2962 }
2964 } else {
2965 return false;
2966 }
2967 if (CurReg == OrgReg)
2968 break;
2969 }
2970
2972 return false;
2973
2974 return true;
2975}
2976
2977
2978
2979bool SwingSchedulerDAG::computeDelta(const MachineInstr &MI, int &Delta) const {
2980 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2981 const MachineOperand *BaseOp;
2983 bool OffsetIsScalable;
2985 return false;
2986
2987
2988 if (OffsetIsScalable)
2989 return false;
2990
2991 if (!BaseOp->isReg())
2992 return false;
2993
2995}
2996
2997
2998
2999
3000
3001
3002
3003
3004bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3005 unsigned &BasePos,
3006 unsigned &OffsetPos,
3009
3011 return false;
3012 unsigned BasePosLd, OffsetPosLd;
3014 return false;
3016
3017
3018 MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
3019 MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3020 if (!Phi || ->isPHI())
3021 return false;
3022
3024 if (!PrevReg)
3025 return false;
3026
3027
3028 MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3029 if (!PrevDef || PrevDef == MI)
3030 return false;
3031
3033 return false;
3034
3035 unsigned BasePos1 = 0, OffsetPos1 = 0;
3037 return false;
3038
3039
3040
3041 int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3042 int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3043 MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3044 NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
3046 MF.deleteMachineInstr(NewMI);
3047 if (!Disjoint)
3048 return false;
3049
3050
3051 BasePos = BasePosLd;
3052 OffsetPos = OffsetPosLd;
3053 NewBase = PrevReg;
3054 Offset = StoreOffset;
3055 return true;
3056}
3057
3058
3059
3064 InstrChanges.find(SU);
3065 if (It != InstrChanges.end()) {
3066 std::pair<Register, int64_t> RegAndOffset = It->second;
3067 unsigned BasePos, OffsetPos;
3068 if (->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3069 return;
3070 Register BaseReg = MI->getOperand(BasePos).getReg();
3071 MachineInstr *LoopDef = findDefInLoop(BaseReg);
3076 if (BaseStageNum < DefStageNum) {
3078 int OffsetDiff = DefStageNum - BaseStageNum;
3079 if (DefCycleNum < BaseCycleNum) {
3081 if (OffsetDiff > 0)
3082 --OffsetDiff;
3083 }
3084 int64_t NewOffset =
3085 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3089 NewMIs[MI] = NewMI;
3090 }
3091 }
3092}
3093
3094
3095
3096
3100 while (Def->isPHI()) {
3101 if (!Visited.insert(Def).second)
3102 break;
3103 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3104 if (Def->getOperand(i + 1).getMBB() == BB) {
3105 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3106 break;
3107 }
3108 }
3109 return Def;
3110}
3111
3112
3113
3116 int DeltaB, DeltaO, Delta;
3118 DeltaB != DeltaO)
3119 return true;
3120 Delta = DeltaB;
3121
3123 int64_t OffsetB, OffsetO;
3124 bool OffsetBIsScalable, OffsetOIsScalable;
3126 if (->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3127 OffsetBIsScalable, TRI) ||
3128 ->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3129 OffsetOIsScalable, TRI))
3130 return true;
3131
3132 if (OffsetBIsScalable || OffsetOIsScalable)
3133 return true;
3134
3136
3137
3138
3139 if (!BaseOpB->isReg() || !BaseOpO->isReg())
3140 return true;
3142 if (!RegB.isVirtual() || !RegO.isVirtual())
3143 return true;
3144
3147 if (!DefB || !DefO || !DefB->isPHI() || !DefO->isPHI())
3148 return true;
3149
3158
3160 return true;
3161 }
3162
3165
3166
3167
3169 return true;
3170
3172 dbgs() << "Overlap check:\n";
3173 dbgs() << " BaseMI: ";
3174 BaseMI->dump();
3175 dbgs() << " Base + " << OffsetB << " + I * " << Delta
3176 << ", Len: " << AccessSizeB.getValue() << "\n";
3177 dbgs() << " OtherMI: ";
3178 OtherMI->dump();
3179 dbgs() << " Base + " << OffsetO << " + I * " << Delta
3180 << ", Len: " << AccessSizeO.getValue() << "\n";
3181 });
3182
3183
3184
3185
3186
3187 if (Delta < 0) {
3188 int64_t BaseMinAddr = OffsetB;
3189 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue() - 1;
3190 if (BaseMinAddr > OhterNextIterMaxAddr) {
3192 return false;
3193 }
3194 } else {
3195 int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue() - 1;
3196 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3197 if (BaseMaxAddr < OtherNextIterMinAddr) {
3199 return false;
3200 }
3201 }
3203 return true;
3204}
3205
3206
3207
3208
3211 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
3212 Edge.getDst()->isBoundaryNode())
3213 return false;
3214
3216 return true;
3217
3218 if (Edge.isOutputDep())
3219 return true;
3220
3222 MachineInstr *DI = Edge.getDst()->getInstr();
3223 assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3224
3225
3229 return true;
3230
3232 return false;
3233
3234
3235
3236
3238}
3239
3240void SwingSchedulerDAG::postProcessDAG() {
3241 for (auto &M : Mutations)
3242 M->apply(this);
3243}
3244
3245
3246
3247
3248
3249
3251 bool forward = true;
3253 dbgs() << "Trying to insert node between " << StartCycle << " and "
3254 << EndCycle << " II: " << II << "\n";
3255 });
3256 if (StartCycle > EndCycle)
3257 forward = false;
3258
3259
3260 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3261 for (int curCycle = StartCycle; curCycle != termCycle;
3262 forward ? ++curCycle : --curCycle) {
3263
3264 if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3265 ProcItinResources.canReserveResources(*SU, curCycle)) {
3267 dbgs() << "\tinsert at cycle " << curCycle << " ";
3269 });
3270
3271 if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
3272 ProcItinResources.reserveResources(*SU, curCycle);
3273 ScheduledInstrs[curCycle].push_back(SU);
3274 InstrToCycle.insert(std::make_pair(SU, curCycle));
3275 if (curCycle > LastCycle)
3276 LastCycle = curCycle;
3277 if (curCycle < FirstCycle)
3278 FirstCycle = curCycle;
3279 return true;
3280 }
3282 dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3284 });
3285 }
3286 return false;
3287}
3288
3289
3295 int EarlyCycle = INT_MAX;
3296 while (!Worklist.empty()) {
3299 if (Visited.count(PrevSU))
3300 continue;
3301 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3302 if (it == InstrToCycle.end())
3303 continue;
3304 EarlyCycle = std::min(EarlyCycle, it->second);
3305 for (const auto &IE : DDG->getInEdges(PrevSU))
3306 if (IE.isOrderDep() || IE.isOutputDep())
3308 Visited.insert(PrevSU);
3309 }
3310 return EarlyCycle;
3311}
3312
3313
3319 int LateCycle = INT_MIN;
3320 while (!Worklist.empty()) {
3324 continue;
3325 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3326 if (it == InstrToCycle.end())
3327 continue;
3328 LateCycle = std::max(LateCycle, it->second);
3329 for (const auto &OE : DDG->getOutEdges(SuccSU))
3330 if (OE.isOrderDep() || OE.isOutputDep())
3332 Visited.insert(SuccSU);
3333 }
3334 return LateCycle;
3335}
3336
3337
3338
3339
3341 for (auto &P : SU->Preds)
3342 if (P.getKind() == SDep::Anti && P.getSUnit()->getInstr()->isPHI())
3343 for (auto &S : P.getSUnit()->Succs)
3344 if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
3345 return P.getSUnit();
3346 return nullptr;
3347}
3348
3349
3350
3354
3355
3356
3357
3358 for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3360 for (const auto &IE : DDG->getInEdges(SU)) {
3361 if (IE.getSrc() == I) {
3362
3363
3366 *MinLateStart = std::min(*MinLateStart, End);
3367 }
3368 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() * II;
3369 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3370 }
3371 }
3372
3373 for (const auto &OE : DDG->getOutEdges(SU)) {
3374 if (OE.getDst() == I) {
3375
3376
3379 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
3380 }
3381 int LateStart = cycle - OE.getLatency() + OE.getDistance() * II;
3382 *MinLateStart = std::min(*MinLateStart, LateStart);
3383 }
3384 }
3385
3387 for (const auto &Dep : SU->Preds) {
3388
3389
3390 if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3392 *MinLateStart = std::min(*MinLateStart, cycle);
3393 }
3394 }
3395 }
3396}
3397
3398
3399
3400
3402 std::deque<SUnit *> &Insts) const {
3404 bool OrderBeforeUse = false;
3405 bool OrderAfterDef = false;
3406 bool OrderBeforeDef = false;
3407 unsigned MoveDef = 0;
3408 unsigned MoveUse = 0;
3411
3412 unsigned Pos = 0;
3413 for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3414 ++I, ++Pos) {
3416 if (!MO.isReg() || !MO.getReg().isVirtual())
3417 continue;
3418
3420 unsigned BasePos, OffsetPos;
3421 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3422 if (MI->getOperand(BasePos).getReg() == Reg)
3424 Reg = NewReg;
3425 bool Reads, Writes;
3426 std::tie(Reads, Writes) =
3427 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3428 if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3429 OrderBeforeUse = true;
3430 if (MoveUse == 0)
3431 MoveUse = Pos;
3432 } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3433
3434 OrderAfterDef = true;
3435 MoveDef = Pos;
3436 } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3438 OrderBeforeUse = true;
3439 if (MoveUse == 0)
3440 MoveUse = Pos;
3441 } else {
3442 OrderAfterDef = true;
3443 MoveDef = Pos;
3444 }
3445 } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3446 OrderBeforeUse = true;
3447 if (MoveUse == 0)
3448 MoveUse = Pos;
3449 if (MoveUse != 0) {
3450 OrderAfterDef = true;
3451 MoveDef = Pos - 1;
3452 }
3453 } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3454
3455 OrderBeforeUse = true;
3456 if (MoveUse == 0)
3457 MoveUse = Pos;
3458 } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3460 if (MoveUse == 0) {
3461 OrderBeforeDef = true;
3462 MoveUse = Pos;
3463 }
3464 }
3465 }
3466
3467
3469 if (OE.getDst() != *I)
3470 continue;
3471 if (OE.isOrderDep() && stageScheduled(*I) == StageInst1) {
3472 OrderBeforeUse = true;
3473 if (Pos < MoveUse)
3474 MoveUse = Pos;
3475 }
3476
3477
3478
3479 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3481 OrderBeforeUse = true;
3482 if ((MoveUse == 0) || (Pos < MoveUse))
3483 MoveUse = Pos;
3484 }
3485 }
3486 for (auto &IE : DDG->getInEdges(SU)) {
3487 if (IE.getSrc() != *I)
3488 continue;
3489 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3491 OrderAfterDef = true;
3492 MoveDef = Pos;
3493 }
3494 }
3495 }
3496
3497
3498 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3499 OrderBeforeUse = false;
3500
3501
3502
3503 if (OrderBeforeDef)
3504 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3505
3506
3507
3508 if (OrderBeforeUse && OrderAfterDef) {
3509 SUnit *UseSU = Insts.at(MoveUse);
3510 SUnit *DefSU = Insts.at(MoveDef);
3511 if (MoveUse > MoveDef) {
3512 Insts.erase(Insts.begin() + MoveUse);
3513 Insts.erase(Insts.begin() + MoveDef);
3514 } else {
3515 Insts.erase(Insts.begin() + MoveDef);
3516 Insts.erase(Insts.begin() + MoveUse);
3517 }
3521 return;
3522 }
3523
3524
3525 if (OrderBeforeUse)
3526 Insts.push_front(SU);
3527 else
3528 Insts.push_back(SU);
3529}
3530
3531
3534 if (!Phi.isPHI())
3535 return false;
3536 assert(Phi.isPHI() && "Expecting a Phi.");
3540
3543 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3544 SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3545 if (!UseSU)
3546 return true;
3548 return true;
3551 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3552}
3553
3554
3555
3556
3557
3558
3559
3560
3564 if (!MO.isReg())
3565 return false;
3566 if (Def->isPHI())
3567 return false;
3569 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3570 return false;
3572 return false;
3575 if (DMO.getReg() == LoopReg)
3576 return true;
3577 }
3578 return false;
3579}
3580
3581
3582
3585 for (const auto &IE : DDG->getInEdges(SU))
3586 if (InstrToCycle.count(IE.getSrc()))
3587 return false;
3588 return true;
3589}
3590
3591
3596
3597 for (auto &SU : SSD->SUnits)
3600
3602 while (!Worklist.empty()) {
3604 if (DoNotPipeline.count(SU))
3605 continue;
3607 DoNotPipeline.insert(SU);
3608 for (const auto &IE : DDG->getInEdges(SU))
3609 Worklist.push_back(IE.getSrc());
3610
3611
3612
3613 for (const auto &OE : DDG->getOutEdges(SU))
3614 if (OE.getDistance() == 1)
3615 Worklist.push_back(OE.getDst());
3616 }
3617 return DoNotPipeline;
3618}
3619
3620
3621
3625
3626 int NewLastCycle = INT_MIN;
3629 continue;
3631 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3632 continue;
3633 }
3634
3635
3638 if (IE.getDistance() == 0)
3639 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3640
3641
3642
3644 if (OE.getDistance() == 1)
3645 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3646
3647 int OldCycle = InstrToCycle[&SU];
3648 if (OldCycle != NewCycle) {
3649 InstrToCycle[&SU] = NewCycle;
3654 << ") is not pipelined; moving from cycle " << OldCycle
3655 << " to " << NewCycle << " Instr:" << *SU.getInstr());
3656 }
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680 if (FirstCycle + InitiationInterval <= NewCycle)
3681 return false;
3682
3683 NewLastCycle = std::max(NewLastCycle, NewCycle);
3684 }
3685 LastCycle = NewLastCycle;
3686 return true;
3687}
3688
3689
3690
3691
3692
3693
3694
3695
3696
3700 continue;
3702 int CycleDef = InstrToCycle[&SU];
3703 assert(StageDef != -1 && "Instruction should have been scheduled.");
3705 SUnit *Dst = OE.getDst();
3706 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3707 if (OE.getReg().isPhysical()) {
3709 return false;
3710 if (InstrToCycle[Dst] <= CycleDef)
3711 return false;
3712 }
3713 }
3714 }
3715 return true;
3716}
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3729
3730
3731 typedef std::pair<SUnit *, unsigned> UnitIndex;
3732 std::vector Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
3733
3734 for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3735 Indices.push_back(std::make_pair(NodeOrder[i], i));
3736
3737 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3738 return std::get<0>(i1) < std::get<0>(i2);
3739 };
3740
3741
3743
3744 bool Valid = true;
3745 (void)Valid;
3746
3747
3748
3749
3750
3751 for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3753 unsigned Index = i;
3754
3755 bool PredBefore = false;
3756 bool SuccBefore = false;
3757
3760 (void)Succ;
3761 (void)Pred;
3762
3763 for (const auto &IE : DDG->getInEdges(SU)) {
3764 SUnit *PredSU = IE.getSrc();
3765 unsigned PredIndex = std::get<1>(
3766 *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
3767 if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
3768 PredBefore = true;
3769 Pred = PredSU;
3770 break;
3771 }
3772 }
3773
3774 for (const auto &OE : DDG->getOutEdges(SU)) {
3775 SUnit *SuccSU = OE.getDst();
3776
3777
3778
3780 continue;
3781 unsigned SuccIndex = std::get<1>(
3782 *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
3783 if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
3784 SuccBefore = true;
3785 Succ = SuccSU;
3786 break;
3787 }
3788 }
3789
3790 if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
3791
3792
3794 Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
3795 if (InCircuit)
3797 else {
3799 NumNodeOrderIssues++;
3801 }
3803 << " are scheduled before node " << SU->NodeNum
3804 << "\n");
3805 }
3806 }
3807
3809 if (!Valid)
3810 dbgs() << "Invalid node order found!\n";
3811 });
3812}
3813
3814
3815
3816
3817
3818
3819
3823 for (SUnit *SU : Instrs) {
3825 for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3827
3828
3829 if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3830
3831
3833 InstrChanges.find(SU);
3834 if (It != InstrChanges.end()) {
3835 unsigned BasePos, OffsetPos;
3836
3837 if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3840 int64_t NewOffset =
3841 MI->getOperand(OffsetPos).getImm() - It->second.second;
3845 NewMIs[MI] = NewMI;
3846 }
3847 }
3850 break;
3851 }
3852
3853
3854 unsigned TiedUseIdx = 0;
3855 if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3856
3857 OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3858
3859 NewBaseReg = MI->getOperand(i).getReg();
3860 break;
3861 }
3862 }
3863 }
3864}
3865
3866std::deque<SUnit *>
3868 const std::deque<SUnit *> &Instrs) const {
3869 std::deque<SUnit *> NewOrderPhi;
3870 for (SUnit *SU : Instrs) {
3872 NewOrderPhi.push_back(SU);
3873 }
3874 std::deque<SUnit *> NewOrderI;
3875 for (SUnit *SU : Instrs) {
3878 }
3880 return NewOrderPhi;
3881}
3882
3883
3884
3885
3887
3889 for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3890 ++stage) {
3891 std::deque<SUnit *> &cycleInstrs =
3892 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3894 ScheduledInstrs[cycle].push_front(SU);
3895 }
3896 }
3897
3898
3899
3900 for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3901 ScheduledInstrs.erase(cycle);
3902
3903
3904
3907
3908
3909
3911 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3914 }
3915
3917}
3918
3920 os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
3921 << " depth " << MaxDepth << " col " << Colocate << "\n";
3922 for (const auto &I : Nodes)
3923 os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
3924 os << "\n";
3925}
3926
3927#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3928
3930
3932
3934 for (SUnit *CI : cycleInstrs->second) {
3935 os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3936 os << "(" << CI->NodeNum << ") ";
3938 os << "\n";
3939 }
3940 }
3941}
3942
3943
3946
3947void ResourceManager::dumpMRT() const {
3949 if (UseDFA)
3950 return;
3951 std::stringstream SS;
3952 SS << "MRT:\n";
3953 SS << std::setw(4) << "Slot";
3954 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
3955 SS << std::setw(3) << I;
3956 SS << std::setw(7) << "#Mops"
3957 << "\n";
3958 for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
3959 SS << std::setw(4) << Slot;
3960 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
3961 SS << std::setw(3) << MRT[Slot][I];
3962 SS << std::setw(7) << NumScheduledMops[Slot] << "\n";
3963 }
3964 dbgs() << SS.str();
3965 });
3966}
3967#endif
3968
3971 unsigned ProcResourceID = 0;
3972
3973
3974
3975 assert(SM.getNumProcResourceKinds() < 64 &&
3976 "Too many kinds of resources, unsupported");
3977
3978
3979 Masks.resize(SM.getNumProcResourceKinds());
3980 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3982 if (Desc.SubUnitsIdxBegin)
3983 continue;
3984 Masks[I] = 1ULL << ProcResourceID;
3985 ProcResourceID++;
3986 }
3987
3988 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3990 if (.SubUnitsIdxBegin)
3991 continue;
3992 Masks[I] = 1ULL << ProcResourceID;
3993 for (unsigned U = 0; U < Desc.NumUnits; ++U)
3994 Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
3995 ProcResourceID++;
3996 }
3999 dbgs() << "ProcResourceDesc:\n";
4000 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
4002 dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
4003 ProcResource->Name, I, Masks[I],
4005 }
4006 dbgs() << " -----------------\n";
4007 }
4008 });
4009}
4010
4014 dbgs() << "canReserveResources:\n";
4015 });
4016 if (UseDFA)
4017 return DFAResources[positiveModulo(Cycle, InitiationInterval)]
4019
4021 if (!SCDesc->isValid()) {
4023 dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4025 });
4026 return true;
4027 }
4028
4029 reserveResources(SCDesc, Cycle);
4030 bool Result = !isOverbooked();
4031 unreserveResources(SCDesc, Cycle);
4032
4034 return Result;
4035}
4036
4037void ResourceManager::reserveResources(SUnit &SU, int Cycle) {
4040 dbgs() << "reserveResources:\n";
4041 });
4042 if (UseDFA)
4043 return DFAResources[positiveModulo(Cycle, InitiationInterval)]
4045
4047 if (!SCDesc->isValid()) {
4049 dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4051 });
4052 return;
4053 }
4054
4055 reserveResources(SCDesc, Cycle);
4056
4059 dumpMRT();
4060 dbgs() << "reserveResources: done!\n\n";
4061 }
4062 });
4063}
4064
4065void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
4070 for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)
4071 ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
4072
4074 ++NumScheduledMops[positiveModulo(C, InitiationInterval)];
4075}
4076
4077void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
4082 for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)
4083 --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
4084
4086 --NumScheduledMops[positiveModulo(C, InitiationInterval)];
4087}
4088
4089bool ResourceManager::isOverbooked() const {
4091 for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
4092 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
4093 const MCProcResourceDesc *Desc = SM.getProcResource(I);
4094 if (MRT[Slot][I] > Desc->NumUnits)
4095 return true;
4096 }
4097 if (NumScheduledMops[Slot] > IssueWidth)
4098 return true;
4099 }
4100 return false;
4101}
4102
4103int ResourceManager::calculateResMIIDFA() const {
4105
4106
4107
4108 FuncUnitSorter FUS = FuncUnitSorter(*ST);
4109 for (SUnit &SU : DAG->SUnits)
4110 FUS.calcCriticalResources(*SU.getInstr());
4111 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
4112 FuncUnitOrder(FUS);
4113
4114 for (SUnit &SU : DAG->SUnits)
4115 FuncUnitOrder.push(SU.getInstr());
4116
4120
4121 while (!FuncUnitOrder.empty()) {
4122 MachineInstr *MI = FuncUnitOrder.top();
4123 FuncUnitOrder.pop();
4125 continue;
4126
4127
4128
4130 unsigned ReservedCycles = 0;
4131 auto *RI = Resources.begin();
4132 auto *RE = Resources.end();
4134 dbgs() << "Trying to reserve resource for " << NumCycles
4135 << " cycles for \n";
4137 });
4138 for (unsigned C = 0; C < NumCycles; ++C)
4139 while (RI != RE) {
4140 if ((*RI)->canReserveResources(*MI)) {
4141 (*RI)->reserveResources(*MI);
4142 ++ReservedCycles;
4143 break;
4144 }
4145 RI++;
4146 }
4147 LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
4148 << ", NumCycles:" << NumCycles << "\n");
4149
4150 for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
4152 << "NewResource created to reserve resources"
4153 << "\n");
4155 assert(NewResource->canReserveResources(*MI) && "Reserve error.");
4156 NewResource->reserveResources(*MI);
4157 Resources.push_back(std::unique_ptr(NewResource));
4158 }
4159 }
4160
4161 int Resmii = Resources.size();
4162 LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
4163 return Resmii;
4164}
4165
4167 if (UseDFA)
4168 return calculateResMIIDFA();
4169
4170
4171
4172
4173 int NumMops = 0;
4175 for (SUnit &SU : DAG->SUnits) {
4177 continue;
4178
4181 continue;
4182
4185 DAG->dumpNode(SU);
4187 << " WriteProcRes: ";
4188 }
4189 });
4192 make_range(STI->getWriteProcResBegin(SCDesc),
4193 STI->getWriteProcResEnd(SCDesc))) {
4197 SM.getProcResource(PRE.ProcResourceIdx);
4198 dbgs() << Desc->Name << ": " << PRE.ReleaseAtCycle << ", ";
4199 }
4200 });
4201 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4202 }
4204 }
4205
4206 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4209 dbgs() << "#Mops: " << NumMops << ", "
4210 << "IssueWidth: " << IssueWidth << ", "
4211 << "Cycles: " << Result << "\n";
4212 });
4213
4216 std::stringstream SS;
4217 SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)
4218 << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"
4219 << "\n";
4220 dbgs() << SS.str();
4221 }
4222 });
4223 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
4225 int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;
4228 std::stringstream SS;
4229 SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)
4230 << Desc->NumUnits << std::setw(10) << ResourceCount[I]
4231 << std::setw(10) << Cycles << "\n";
4232 dbgs() << SS.str();
4233 }
4234 });
4235 if (Cycles > Result)
4236 Result = Cycles;
4237 }
4238 return Result;
4239}
4240
4242 InitiationInterval = II;
4243 DFAResources.clear();
4244 DFAResources.resize(II);
4245 for (auto &I : DFAResources)
4246 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4247 MRT.clear();
4249 NumScheduledMops.clear();
4250 NumScheduledMops.resize(II);
4251}
4252
4254 if (Pred.isArtificial() || Dst->isBoundaryNode())
4255 return true;
4256
4257
4258
4259 return IgnoreAnti && (Pred.getKind() == SDep::Kind::Anti || Distance != 0);
4260}
4261
4262SwingSchedulerDDG::SwingSchedulerDDGEdges &
4263SwingSchedulerDDG::getEdges(const SUnit *SU) {
4264 if (SU == EntrySU)
4265 return EntrySUEdges;
4266 if (SU == ExitSU)
4267 return ExitSUEdges;
4268 return EdgesVec[SU->NodeNum];
4269}
4270
4271const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4272SwingSchedulerDDG::getEdges(const SUnit *SU) const {
4273 if (SU == EntrySU)
4274 return EntrySUEdges;
4275 if (SU == ExitSU)
4276 return ExitSUEdges;
4277 return EdgesVec[SU->NodeNum];
4278}
4279
4280void SwingSchedulerDDG::addEdge(const SUnit *SU,
4281 const SwingSchedulerDDGEdge &Edge) {
4283 "Validation-only edges are not expected here.");
4284 auto &Edges = getEdges(SU);
4285 if (Edge.getSrc() == SU)
4286 Edges.Succs.push_back(Edge);
4287 else
4288 Edges.Preds.push_back(Edge);
4289}
4290
4291void SwingSchedulerDDG::initEdges(SUnit *SU) {
4292 for (const auto &PI : SU->Preds) {
4293 SwingSchedulerDDGEdge Edge(SU, PI, false,
4294 false);
4296 }
4297
4298 for (const auto &SI : SU->Succs) {
4299 SwingSchedulerDDGEdge Edge(SU, SI, true,
4300 false);
4302 }
4303}
4304
4307 : EntrySU(EntrySU), ExitSU(ExitSU) {
4308 EdgesVec.resize(SUnits.size());
4309
4310
4311 initEdges(EntrySU);
4312 initEdges(ExitSU);
4313 for (auto &SU : SUnits)
4314 initEdges(&SU);
4315
4316
4317 for (SUnit &SU : SUnits) {
4318 SUnit *Src = &SU;
4321 Base.setLatency(1);
4322 for (SUnit *Dst : *OD) {
4324 true);
4325 Edge.setDistance(1);
4326 ValidationOnlyEdges.push_back(Edge);
4327 }
4328 }
4329 }
4330}
4331
4332const SwingSchedulerDDG::EdgesType &
4334 return getEdges(SU).Preds;
4335}
4336
4337const SwingSchedulerDDG::EdgesType &
4339 return getEdges(SU).Succs;
4340}
4341
4342
4345
4346 auto ExpandCycle = [&](SUnit *SU) {
4349 return Cycle + (Stage * II);
4350 };
4351
4353 SUnit *Src = Edge.getSrc();
4354 SUnit *Dst = Edge.getDst();
4355 if (!Src->isInstr() || !Dst->isInstr())
4356 continue;
4357 int CycleSrc = ExpandCycle(Src);
4358 int CycleDst = ExpandCycle(Dst);
4359 int MaxLateStart = CycleDst + Edge.getDistance() * II - Edge.getLatency();
4360 if (CycleSrc > MaxLateStart) {
4362 dbgs() << "Validation failed for edge from " << Src->NodeNum << " to "
4363 << Dst->NodeNum << "\n";
4364 });
4365 return false;
4366 }
4367 }
4368 return true;
4369}
4370
4373 for (SUnit &SU : SUnits) {
4374 SUnit *Src = &SU;
4379 SUnit *From = Src;
4380 SUnit *To = Dst;
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4402 ->isGlobalMemoryObject(FromMI) &&
4403 ->isGlobalMemoryObject(ToMI) &&
(From, To)) {
4404 SDep Pred = Dep;
4407 }
4408 }
4409 }
4410 }
4411}
4412
4416
4417 if (!Order)
4418 return;
4419
4420 const auto DumpSU = [](const SUnit *SU) {
4421 std::ostringstream OSS;
4422 OSS << "SU(" << SU->NodeNum << ")";
4423 return OSS.str();
4424 };
4425
4426 dbgs() << " Loop carried edges from " << DumpSU(SU) << "\n"
4427 << " Order\n";
4428 for (SUnit *Dst : *Order)
4429 dbgs() << " " << DumpSU(Dst) << "\n";
4430}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
print mir2vec MIR2Vec Vocabulary Printer Pass
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool hasPHICycleDFS(unsigned Reg, const DenseMap< unsigned, SmallVector< unsigned, 2 > > &PhiDeps, SmallSet< unsigned, 8 > &Visited, SmallSet< unsigned, 8 > &RecStack)
Depth-first search to detect cycles among PHI dependencies.
Definition MachinePipeliner.cpp:490
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition MachinePipeliner.cpp:963
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
Definition MachinePipeliner.cpp:2331
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
Definition MachinePipeliner.cpp:1405
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition MachinePipeliner.cpp:979
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
Definition MachinePipeliner.cpp:2541
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
Definition MachinePipeliner.cpp:2900
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))
A command line argument to limit the number of store instructions in the target basic block.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
Definition MachinePipeliner.cpp:988
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool hasPHICycle(const MachineBasicBlock *LoopHeader, const MachineRegisterInfo &MRI)
Definition MachinePipeliner.cpp:516
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
Definition MachinePipeliner.cpp:2230
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD, bool PerformCheapCheck)
Returns true if there is a loop-carried order dependency from Src to Dst.
Definition MachinePipeliner.cpp:1055
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
Definition MachinePipeliner.cpp:2301
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Definition MachinePipeliner.cpp:2266
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
Definition MachinePipeliner.cpp:3340
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Definition MachinePipeliner.cpp:2882
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Add loop-carried chain dependencies.
Definition MachinePipeliner.cpp:276
void computeDependencies()
The main function to compute loop-carried order-dependencies.
Definition MachinePipeliner.cpp:1130
const BitVector & getLoopCarried(unsigned Idx) const
Definition MachinePipeliner.cpp:335
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition MachinePipeliner.cpp:1124
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
Represents a single loop in the control flow graph.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
Definition MachinePipeliner.cpp:363
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachinePipeliner.cpp:698
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
Definition MachinePipeliner.cpp:3919
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Definition MachinePipeliner.cpp:3945
AnalysisType & getAnalysis() const
getAnalysis() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
Definition MachinePipeliner.cpp:4166
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Definition MachinePipeliner.cpp:3969
void init(int II)
Initialize resources with the initiation interval II.
Definition MachinePipeliner.cpp:4241
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Definition MachinePipeliner.cpp:4011
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
Definition MachinePipeliner.cpp:3867
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
Definition MachinePipeliner.cpp:3944
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
Definition MachinePipeliner.cpp:3250
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
Definition MachinePipeliner.cpp:3290
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
Definition MachinePipeliner.cpp:3929
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
Definition MachinePipeliner.cpp:3583
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
Definition MachinePipeliner.cpp:3401
bool isValidSchedule(SwingSchedulerDAG *SSD)
Definition MachinePipeliner.cpp:3697
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
Definition MachinePipeliner.cpp:3561
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
Definition MachinePipeliner.cpp:3592
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
Definition MachinePipeliner.cpp:3351
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Definition MachinePipeliner.cpp:3622
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
Definition MachinePipeliner.cpp:3532
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
Definition MachinePipeliner.cpp:3314
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Definition MachinePipeliner.cpp:3886
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
typename vector_type::const_iterator iterator
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
Definition MachinePipeliner.cpp:3060
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
Definition MachinePipeliner.cpp:952
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
Definition MachinePipeliner.cpp:3820
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
Definition MachinePipeliner.cpp:3209
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
Definition MachinePipeliner.cpp:762
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Definition MachinePipeliner.cpp:3114
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
Definition MachinePipeliner.cpp:4253
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
Definition MachinePipeliner.cpp:4305
const EdgesType & getInEdges(const SUnit *SU) const
Definition MachinePipeliner.cpp:4333
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
Definition MachinePipeliner.cpp:4343
const EdgesType & getOutEdges(const SUnit *SU) const
Definition MachinePipeliner.cpp:4338
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Wrapper class representing a virtual register or register unit.
constexpr bool isVirtualReg() const
constexpr MCRegUnit asMCRegUnit() const
constexpr Register asVirtualReg() const
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
@ Valid
The data is already valid.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
std::set< NodeId > NodeSet
friend class Instruction
Iterator for Instructions in a `BasicBlock.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< bool > SwpEnableCopyToPhi
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
Definition MachinePipeliner.cpp:233
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
@ Increment
Incrementally increasing token ID.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
Definition MachinePipeliner.cpp:248
SUnit * SU
Definition MachinePipeliner.cpp:249
const Value * MemOpValue
The value of a memory operand.
Definition MachinePipeliner.cpp:253
SmallVector< const Value *, 2 > UnderlyingObjs
Definition MachinePipeliner.cpp:250
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
Definition MachinePipeliner.cpp:1019
AAMDNodes AATags
Definition MachinePipeliner.cpp:258
int64_t MemOpOffset
The offset of a memory operand.
Definition MachinePipeliner.cpp:256
bool IsAllIdentified
True if all the underlying objects are identified.
Definition MachinePipeliner.cpp:261
SUnitWithMemInfo(SUnit *SU)
Definition MachinePipeliner.cpp:1009
bool isUnknown() const
Definition MachinePipeliner.cpp:267
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
uint64_t FuncUnits
Bitmask representing a set of functional units.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
Definition MachinePipeliner.cpp:4371
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Definition MachinePipeliner.cpp:4413
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.