LLVM: lib/Target/AMDGPU/SIMachineScheduler.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
19
20using namespace llvm;
21
22#define DEBUG_TYPE "machine-scheduler"
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122#ifndef NDEBUG
123
125 switch (Reason) {
126 case NoCand: return "NOCAND";
127 case RegUsage: return "REGUSAGE";
128 case Latency: return "LATENCY";
129 case Successor: return "SUCCESSOR";
130 case Depth: return "DEPTH";
132 }
134}
135
136#endif
137
139static bool tryLess(int TryVal, int CandVal,
143 if (TryVal < CandVal) {
144 TryCand.Reason = Reason;
145 return true;
146 }
147 if (TryVal > CandVal) {
148 if (Cand.Reason > Reason)
149 Cand.Reason = Reason;
150 return true;
151 }
153 return false;
154}
155
160 if (TryVal > CandVal) {
161 TryCand.Reason = Reason;
162 return true;
163 }
164 if (TryVal < CandVal) {
165 if (Cand.Reason > Reason)
166 Cand.Reason = Reason;
167 return true;
168 }
170 return false;
171}
172}
173
174
175
177 NodeNum2Index[SU->NodeNum] = SUnits.size();
178 SUnits.push_back(SU);
179}
180
181#ifndef NDEBUG
182void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
183
184 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
185 dbgs() << '\n';
186}
187#endif
188
189void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
190 SISchedCandidate &TryCand) {
191
192 if (!Cand.isValid()) {
194 return;
195 }
196
197 if (Cand.SGPRUsage > 60 &&
200 return;
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
218 Cand.HasLowLatencyNonWaitedParent,
220 return;
221
224 return;
225
226 if (TryCand.IsLowLatency &&
227 SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
229 return;
230
233 return;
234
235
236 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
238 }
239}
240
241SUnit* SIScheduleBlock::pickNode() {
242 SISchedCandidate TopCand;
243
244 for (SUnit* SU : TopReadySUs) {
245 SISchedCandidate TryCand;
246 std::vector pressure;
247 std::vector MaxPressure;
248
249 TryCand.SU = SU;
251 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
252 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
253 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
254 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
255 TryCand.HasLowLatencyNonWaitedParent =
256 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
257 tryCandidateTopDown(TopCand, TryCand);
258 if (TryCand.Reason != NoCand)
259 TopCand.setBest(TryCand);
260 }
261
262 return TopCand.SU;
263}
264
265
266
268 TopReadySUs.clear();
269 if (Scheduled)
270 undoSchedule();
271
272 for (SUnit* SU : SUnits) {
273 if (!SU->NumPredsLeft)
274 TopReadySUs.push_back(SU);
275 }
276
277 while (!TopReadySUs.empty()) {
278 SUnit *SU = TopReadySUs[0];
279 ScheduledSUnits.push_back(SU);
280 nodeScheduled(SU);
281 }
282
283 Scheduled = true;
284}
285
286
292 UI = MRI->def_instr_begin(Reg),
293 UE = MRI->def_instr_end(); UI != UE; ++UI) {
295 if (MI->isDebugValue())
296 continue;
298 if (InstSlot >= First && InstSlot <= Last)
299 return true;
300 }
301 return false;
302}
303
313
314
315
316 for (SUnit* SU : ScheduledSUnits) {
317 RPTracker.setPos(SU->getInstr());
318 RPTracker.advance();
319 }
320
321
322 RPTracker.closeRegion();
323
324
325 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
326 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
327
328
329 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
330 if (RegMaskPair.RegUnit.isVirtual())
331 LiveInRegs.insert(RegMaskPair.RegUnit);
332 }
333 LiveOutRegs.clear();
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
358 if (Reg.isVirtual() &&
361 LIS)) {
362 LiveOutRegs.insert(Reg);
363 }
364 }
365
366
367
368
369
372
373
375}
376
379 if (!Scheduled)
381
382
383 initRegPressure(BeginBlock, EndBlock);
384 undoSchedule();
385
386
387
388 TopReadySUs.clear();
389
390 for (SUnit* SU : SUnits) {
391 if (!SU->NumPredsLeft)
392 TopReadySUs.push_back(SU);
393 }
394
395 while (!TopReadySUs.empty()) {
396 SUnit *SU = pickNode();
397 ScheduledSUnits.push_back(SU);
400 nodeScheduled(SU);
401 }
402
403
404 InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
405
406
407#ifndef NDEBUG
408 assert(SUnits.size() == ScheduledSUnits.size() &&
409 TopReadySUs.empty());
410 for (SUnit* SU : SUnits) {
411 assert(SU->isScheduled &&
412 SU->NumPredsLeft == 0);
413 }
414#endif
415
416 Scheduled = true;
417}
418
419void SIScheduleBlock::undoSchedule() {
420 for (SUnit* SU : SUnits) {
421 SU->isScheduled = false;
422 for (SDep& Succ : SU->Succs) {
424 undoReleaseSucc(SU, &Succ);
425 }
426 }
427 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
428 ScheduledSUnits.clear();
429 Scheduled = false;
430}
431
432void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
434
435 if (SuccEdge->isWeak()) {
437 return;
438 }
440}
441
442void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
444
445 if (SuccEdge->isWeak()) {
447 return;
448 }
449#ifndef NDEBUG
451 dbgs() << "*** Scheduling failed! ***\n";
453 dbgs() << " has been released too many times!\n";
455 }
456#endif
457
459}
460
461
462void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
465
467 continue;
468
470 continue;
471
472 releaseSucc(SU, &Succ);
473 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
474 TopReadySUs.push_back(SuccSU);
475 }
476}
477
478void SIScheduleBlock::nodeScheduled(SUnit *SU) {
479
481 std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
482 if (I == TopReadySUs.end()) {
483 dbgs() << "Data Structure Bug in SI Scheduler\n";
485 }
486 TopReadySUs.erase(I);
487
488 releaseSuccessors(SU, true);
489
490
491 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
492 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
493
496 std::map<unsigned, unsigned>::iterator I =
498 if (I != NodeNum2Index.end())
499 HasLowLatencyNonWaitedParent[I->second] = 1;
500 }
501 }
503}
504
506
507 for (SUnit* SU : SUnits) {
508 releaseSuccessors(SU, false);
510 HighLatencyBlock = true;
511 }
512 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
513}
514
515
517 unsigned PredID = Pred->getID();
518
519
521 if (PredID == P->getID())
522 return;
523 }
524 Preds.push_back(Pred);
525
529 return PredID == S.first->getID();
530 }) &&
531 "Loop in the Block Graph!");
532}
533
536 unsigned SuccID = Succ->getID();
537
538
539 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
540 if (SuccID == S.first->getID()) {
543 S.second = Kind;
544 return;
545 }
546 }
548 ++NumHighLatencySuccessors;
549 Succs.emplace_back(Succ, Kind);
550
553 "Loop in the Block Graph!");
554}
555
556#ifndef NDEBUG
558 dbgs() << "Block (" << ID << ")\n";
559 if (!full)
560 return;
561
562 dbgs() << "\nContains High Latency Instruction: "
563 << HighLatencyBlock << '\n';
564 dbgs() << "\nDepends On:\n";
566 P->printDebug(false);
567 }
568
569 dbgs() << "\nSuccessors:\n";
570 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
572 dbgs() << "(Data Dep) ";
573 S.first->printDebug(false);
574 }
575
576 if (Scheduled) {
577 dbgs() << "LiveInPressure "
578 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
579 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
580 dbgs() << "LiveOutPressure "
581 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
582 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
583 dbgs() << "LiveIns:\n";
584 for (unsigned Reg : LiveInRegs)
586
587 dbgs() << "\nLiveOuts:\n";
588 for (unsigned Reg : LiveOutRegs)
590 }
591
592 dbgs() << "\nInstructions:\n";
593 for (const SUnit* SU : SUnits)
595
596 dbgs() << "///////////////////////\n";
597}
598#endif
599
600
601
603 : DAG(DAG) {}
604
607 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
608 Blocks.find(BlockVariant);
609 if (B == Blocks.end()) {
611 createBlocksForVariant(BlockVariant);
612 topologicalSort();
613 scheduleInsideBlocks();
614 fillStats();
615 Res.Blocks = CurrentBlocks;
618 Blocks[BlockVariant] = Res;
619 return Res;
620 }
621 return B->second;
622}
623
626 return false;
627 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
628}
629
630void SIScheduleBlockCreator::colorHighLatenciesAlone() {
631 unsigned DAGSize = DAG->SUnits.size();
632
633 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
636 CurrentColoring[SU->NodeNum] = NextReservedID++;
637 }
638 }
639}
640
641static bool
643 for (const auto &PredDep : SU.Preds) {
644 if (PredDep.getSUnit() == &FromSU &&
646 return true;
647 }
648 return false;
649}
650
651void SIScheduleBlockCreator::colorHighLatenciesGroups() {
652 unsigned DAGSize = DAG->SUnits.size();
653 unsigned NumHighLatencies = 0;
654 unsigned GroupSize;
655 int Color = NextReservedID;
656 unsigned Count = 0;
657 std::set FormingGroup;
658
659 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
662 ++NumHighLatencies;
663 }
664
665 if (NumHighLatencies == 0)
666 return;
667
668 if (NumHighLatencies <= 6)
669 GroupSize = 2;
670 else if (NumHighLatencies <= 12)
671 GroupSize = 3;
672 else
673 GroupSize = 4;
674
678 unsigned CompatibleGroup = true;
679 int ProposedColor = Color;
680 std::vector AdditionalElements;
681
682
683
684
685
686
687
688
689
690
691
692 for (unsigned j : FormingGroup) {
693 bool HasSubGraph;
694 std::vector SubGraph;
695
696
697
698#ifndef NDEBUG
700 HasSubGraph);
701 assert(!HasSubGraph);
702#endif
704 HasSubGraph);
705 if (!HasSubGraph)
706 continue;
707 if (SubGraph.size() > 5) {
708
709 CompatibleGroup = false;
710 break;
711 }
712
713 for (unsigned k : SubGraph) {
714
715
716
717
718 if (DAG->IsHighLatencySU[k] || (CurrentColoring[k] != ProposedColor &&
719 CurrentColoring[k] != 0)) {
720 CompatibleGroup = false;
721 break;
722 }
723
724
726 CompatibleGroup = false;
727 break;
728 }
729 }
730 if (!CompatibleGroup)
731 break;
732
734 CompatibleGroup = false;
735 break;
736 }
737
738
739
740
741
743 }
744 if (CompatibleGroup) {
745 FormingGroup.insert(SU.NodeNum);
746 for (unsigned j : AdditionalElements)
747 CurrentColoring[j] = ProposedColor;
748 CurrentColoring[SU.NodeNum] = ProposedColor;
749 ++Count;
750 }
751
752
753
754 if (!CompatibleGroup) {
755 FormingGroup.clear();
756 Color = ++NextReservedID;
757 ProposedColor = Color;
758 FormingGroup.insert(SU.NodeNum);
759 CurrentColoring[SU.NodeNum] = ProposedColor;
760 Count = 0;
761 } else if (Count == GroupSize) {
762 FormingGroup.clear();
763 Color = ++NextReservedID;
764 ProposedColor = Color;
765 Count = 0;
766 }
767 }
768 }
769}
770
771void SIScheduleBlockCreator::colorComputeReservedDependencies() {
772 unsigned DAGSize = DAG->SUnits.size();
773 std::map<std::set, unsigned> ColorCombinations;
774
775 CurrentTopDownReservedDependencyColoring.clear();
776 CurrentBottomUpReservedDependencyColoring.clear();
777
778 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
779 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
780
781
782
783
786 std::set SUColors;
787
788
789 if (CurrentColoring[SU->NodeNum]) {
790 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
791 CurrentColoring[SU->NodeNum];
792 continue;
793 }
794
795 for (SDep& PredDep : SU->Preds) {
797 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
798 continue;
799 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
800 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
801 }
802
803 if (SUColors.empty())
804 continue;
805
806 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
807 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
808 *SUColors.begin();
809 else {
810 std::map<std::set, unsigned>::iterator Pos =
811 ColorCombinations.find(SUColors);
812 if (Pos != ColorCombinations.end()) {
813 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
814 } else {
815 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
816 NextNonReservedID;
817 ColorCombinations[SUColors] = NextNonReservedID++;
818 }
819 }
820 }
821
822 ColorCombinations.clear();
823
824
825
828 std::set SUColors;
829
830
831 if (CurrentColoring[SU->NodeNum]) {
832 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
833 CurrentColoring[SU->NodeNum];
834 continue;
835 }
836
837 for (SDep& SuccDep : SU->Succs) {
839 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
840 continue;
841 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
842 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
843 }
844
845 if (SUColors.empty())
846 continue;
847
848 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
849 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
850 *SUColors.begin();
851 else {
852 std::map<std::set, unsigned>::iterator Pos =
853 ColorCombinations.find(SUColors);
854 if (Pos != ColorCombinations.end()) {
855 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
856 } else {
857 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
858 NextNonReservedID;
859 ColorCombinations[SUColors] = NextNonReservedID++;
860 }
861 }
862 }
863}
864
865void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
866 std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
867
868
869
870
872 std::pair<unsigned, unsigned> SUColors;
873
874
875 if (CurrentColoring[SU.NodeNum])
876 continue;
877
878 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum];
879 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum];
880
882 ColorCombinations.try_emplace(SUColors, NextNonReservedID);
883 CurrentColoring[SU.NodeNum] = Pos->second;
884 if (Inserted)
885 NextNonReservedID++;
886 }
887}
888
889void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
890 unsigned DAGSize = DAG->SUnits.size();
891 std::vector PendingColoring = CurrentColoring;
892
893 assert(DAGSize >= 1 &&
894 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
895 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
896
897
898 if (*llvm::max_element(CurrentBottomUpReservedDependencyColoring) == 0 &&
899 *llvm::max_element(CurrentTopDownReservedDependencyColoring) == 0)
900 return;
901
904 std::set SUColors;
905 std::set SUColorsPending;
906
907 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
908 continue;
909
910 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
911 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
912 continue;
913
914 for (SDep& SuccDep : SU->Succs) {
916 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
917 continue;
918 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
919 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
920 SUColors.insert(CurrentColoring[Succ->NodeNum]);
921 SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
922 }
923
924
925
926 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
927 PendingColoring[SU->NodeNum] = *SUColors.begin();
928 else
929
930 PendingColoring[SU->NodeNum] = NextNonReservedID++;
931 }
932 CurrentColoring = PendingColoring;
933}
934
935
936void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
937 unsigned DAGSize = DAG->SUnits.size();
938 unsigned PreviousColor;
939 std::set SeenColors;
940
941 if (DAGSize <= 1)
942 return;
943
944 PreviousColor = CurrentColoring[0];
945
946 for (unsigned i = 1, e = DAGSize; i != e; ++i) {
948 unsigned CurrentColor = CurrentColoring[i];
949 unsigned PreviousColorSave = PreviousColor;
951
952 if (CurrentColor != PreviousColor)
953 SeenColors.insert(PreviousColor);
954 PreviousColor = CurrentColor;
955
956 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
957 continue;
958
959 if (SeenColors.find(CurrentColor) == SeenColors.end())
960 continue;
961
962 if (PreviousColorSave != CurrentColor)
963 CurrentColoring[i] = NextNonReservedID++;
964 else
965 CurrentColoring[i] = CurrentColoring[i-1];
966 }
967}
968
969void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
970 unsigned DAGSize = DAG->SUnits.size();
971
974 std::set SUColors;
975
976 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
977 continue;
978
979
980
982 continue;
983
984 for (SDep& SuccDep : SU->Succs) {
986 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
987 continue;
988 SUColors.insert(CurrentColoring[Succ->NodeNum]);
989 }
990 if (SUColors.size() == 1)
991 CurrentColoring[SU->NodeNum] = *SUColors.begin();
992 }
993}
994
995void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
996 unsigned DAGSize = DAG->SUnits.size();
997
1000 std::set SUColors;
1001
1002 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1003 continue;
1004
1005 for (SDep& SuccDep : SU->Succs) {
1007 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1008 continue;
1009 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1010 }
1011 if (SUColors.size() == 1)
1012 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1013 }
1014}
1015
1016void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1017 unsigned DAGSize = DAG->SUnits.size();
1018
1021 std::set SUColors;
1022
1023 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1024 continue;
1025
1026 for (SDep& SuccDep : SU->Succs) {
1028 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1029 continue;
1030 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1031 }
1032 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1033 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1034 }
1035}
1036
1037void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1038 unsigned DAGSize = DAG->SUnits.size();
1039 std::map<unsigned, unsigned> ColorCount;
1040
1043 unsigned color = CurrentColoring[SU->NodeNum];
1044 ++ColorCount[color];
1045 }
1046
1049 unsigned color = CurrentColoring[SU->NodeNum];
1050 std::set SUColors;
1051
1052 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1053 continue;
1054
1055 if (ColorCount[color] > 1)
1056 continue;
1057
1058 for (SDep& SuccDep : SU->Succs) {
1060 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1061 continue;
1062 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1063 }
1064 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1065 --ColorCount[color];
1066 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1067 ++ColorCount[*SUColors.begin()];
1068 }
1069 }
1070}
1071
1072void SIScheduleBlockCreator::cutHugeBlocks() {
1073
1074}
1075
1076void SIScheduleBlockCreator::regroupNoUserInstructions() {
1077 unsigned DAGSize = DAG->SUnits.size();
1078 int GroupID = NextNonReservedID++;
1079
1082 bool hasSuccessor = false;
1083
1084 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1085 continue;
1086
1087 for (SDep& SuccDep : SU->Succs) {
1089 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1090 continue;
1091 hasSuccessor = true;
1092 }
1093 if (!hasSuccessor)
1094 CurrentColoring[SU->NodeNum] = GroupID;
1095 }
1096}
1097
1098void SIScheduleBlockCreator::colorExports() {
1099 unsigned ExportColor = NextNonReservedID++;
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1115
1116
1117 for (const SDep &SuccDep : SU.Succs) {
1120
1121 continue;
1122 }
1124 "SUnit unexpectedly not representing an instruction!");
1125
1127
1128
1129
1130
1131
1132 return;
1133 }
1134 }
1136 }
1137 }
1138
1139
1140 for (unsigned j : ExpGroup)
1141 CurrentColoring[j] = ExportColor;
1142}
1143
1145 unsigned DAGSize = DAG->SUnits.size();
1146 std::map<unsigned,unsigned> RealID;
1147
1148 CurrentBlocks.clear();
1149 CurrentColoring.clear();
1150 CurrentColoring.resize(DAGSize, 0);
1151 Node2CurrentBlock.clear();
1152
1153
1155
1156 NextReservedID = 1;
1157 NextNonReservedID = DAGSize + 1;
1158
1160
1162 colorHighLatenciesGroups();
1163 else
1164 colorHighLatenciesAlone();
1165 colorComputeReservedDependencies();
1166 colorAccordingToReservedDependencies();
1167 colorEndsAccordingToDependencies();
1169 colorForceConsecutiveOrderInGroup();
1170 regroupNoUserInstructions();
1171 colorMergeConstantLoadsNextGroup();
1172 colorMergeIfPossibleNextGroupOnlyForReserved();
1173 colorExports();
1174
1175
1176 Node2CurrentBlock.resize(DAGSize, -1);
1177 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1179 unsigned Color = CurrentColoring[SU->NodeNum];
1180 if (RealID.find(Color) == RealID.end()) {
1181 int ID = CurrentBlocks.size();
1182 BlockPtrs.push_back(std::make_unique(DAG, this, ID));
1183 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1184 RealID[Color] = ID;
1185 }
1186 CurrentBlocks[RealID[Color]]->addUnit(SU);
1187 Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1188 }
1189
1190
1191 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1193 int SUID = Node2CurrentBlock[i];
1194 for (SDep& SuccDep : SU->Succs) {
1196 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1197 continue;
1198 if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1199 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1201 }
1202 for (SDep& PredDep : SU->Preds) {
1204 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
1205 continue;
1206 if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1207 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1208 }
1209 }
1210
1211
1213 Block->finalizeUnits();
1215 dbgs() << "Blocks created:\n\n";
1217 Block->printDebug(true);
1218 });
1219}
1220
1221
1222
1223
1228 if (->isDebugInstr())
1229 break;
1230 }
1231 return I;
1232}
1233
1234void SIScheduleBlockCreator::topologicalSort() {
1235 unsigned DAGSize = CurrentBlocks.size();
1236 std::vector WorkList;
1237
1239
1240 WorkList.reserve(DAGSize);
1241 TopDownIndex2Block.resize(DAGSize);
1242 TopDownBlock2Index.resize(DAGSize);
1243 BottomUpIndex2Block.resize(DAGSize);
1244
1245 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1247 unsigned Degree = Block->getSuccs().size();
1248 TopDownBlock2Index[i] = Degree;
1249 if (Degree == 0) {
1250 WorkList.push_back(i);
1251 }
1252 }
1253
1254 int Id = DAGSize;
1255 while (!WorkList.empty()) {
1256 int i = WorkList.back();
1258 WorkList.pop_back();
1259 TopDownBlock2Index[i] = --Id;
1260 TopDownIndex2Block[Id] = i;
1262 if (!--TopDownBlock2Index[Pred->getID()])
1263 WorkList.push_back(Pred->getID());
1264 }
1265 }
1266
1267#ifndef NDEBUG
1268
1269 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1272 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1273 "Wrong Top Down topological sorting");
1274 }
1275 }
1276#endif
1277
1278 BottomUpIndex2Block = std::vector(TopDownIndex2Block.rbegin(),
1279 TopDownIndex2Block.rend());
1280}
1281
1282void SIScheduleBlockCreator::scheduleInsideBlocks() {
1283 unsigned DAGSize = CurrentBlocks.size();
1284
1286
1287
1288
1289 LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");
1290 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1292 Block->fastSchedule();
1293 }
1294
1295
1296
1297
1298
1300 std::vectorMachineBasicBlock::iterator PosOld;
1301 std::vectorMachineBasicBlock::iterator PosNew;
1302 PosOld.reserve(DAG->SUnits.size());
1303 PosNew.reserve(DAG->SUnits.size());
1304
1305 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1306 int BlockIndice = TopDownIndex2Block[i];
1308 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1309
1310 for (SUnit* SU : SUs) {
1313 PosOld.push_back(Pos);
1314 if (&*CurrentTopFastSched == MI) {
1315 PosNew.push_back(Pos);
1316 CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1318 } else {
1319
1321
1322
1323
1324
1325
1326
1328 PosNew.push_back(CurrentTopFastSched);
1329 }
1330 }
1331 }
1332
1333
1334
1335
1336
1337 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1339 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1340 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1341 }
1342
1344
1345 for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1348 if (PNew != POld) {
1349
1351
1352
1354 }
1355 }
1356
1359 Block->printDebug(true);
1360 });
1361}
1362
1363void SIScheduleBlockCreator::fillStats() {
1364 unsigned DAGSize = CurrentBlocks.size();
1365
1366 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1367 int BlockIndice = TopDownIndex2Block[i];
1369 if (Block->getPreds().empty())
1370 Block->Depth = 0;
1371 else {
1372 unsigned Depth = 0;
1374 if (Depth < Pred->Depth + Pred->getCost())
1375 Depth = Pred->Depth + Pred->getCost();
1376 }
1378 }
1379 }
1380
1381 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1382 int BlockIndice = BottomUpIndex2Block[i];
1384 if (Block->getSuccs().empty())
1385 Block->Height = 0;
1386 else {
1387 unsigned Height = 0;
1388 for (const auto &Succ : Block->getSuccs())
1389 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1390 Block->Height = Height;
1391 }
1392 }
1393}
1394
1395
1396
1400 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1401 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1402 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414 LiveOutRegsNumUsages.resize(Blocks.size());
1416 for (unsigned Reg : Block->getInRegs()) {
1417 bool Found = false;
1418 int topoInd = -1;
1420 std::set PredOutRegs = Pred->getOutRegs();
1421 std::set::iterator RegPos = PredOutRegs.find(Reg);
1422
1423 if (RegPos != PredOutRegs.end()) {
1424 Found = true;
1427 }
1428 }
1429 }
1430
1431 if (!Found)
1432 continue;
1433
1435 ++LiveOutRegsNumUsages[PredID][Reg];
1436 }
1437 }
1438
1439 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1440 BlockNumPredsLeft.resize(Blocks.size());
1441 BlockNumSuccsLeft.resize(Blocks.size());
1442
1443 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1445 BlockNumPredsLeft[i] = Block->getPreds().size();
1446 BlockNumSuccsLeft[i] = Block->getSuccs().size();
1447 }
1448
1449#ifndef NDEBUG
1450 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1453 }
1454#endif
1455
1456 std::set InRegs = DAG->getInRegs();
1457 addLiveRegs(InRegs);
1458
1459
1460
1461
1462 for (unsigned Reg : DAG->getOutRegs()) {
1463 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1464
1467 const std::set &OutRegs = Block->getOutRegs();
1468
1469 if (OutRegs.find(Reg) == OutRegs.end())
1470 continue;
1471
1472 ++LiveOutRegsNumUsages[ID][Reg];
1473 break;
1474 }
1475 }
1476
1477
1478
1480 for (unsigned Reg : Block->getInRegs()) {
1481 bool Found = false;
1483 std::set PredOutRegs = Pred->getOutRegs();
1484 std::set::iterator RegPos = PredOutRegs.find(Reg);
1485
1486 if (RegPos != PredOutRegs.end()) {
1487 Found = true;
1488 break;
1489 }
1490 }
1491
1492 if (!Found)
1493 ++LiveRegsConsumers[Reg];
1494 }
1495 }
1496
1497 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1499 if (BlockNumPredsLeft[i] == 0) {
1500 ReadyBlocks.push_back(Block);
1501 }
1502 }
1503
1505 BlocksScheduled.push_back(Block);
1506 blockScheduled(Block);
1507 }
1508
1510 : BlocksScheduled) {
1511 dbgs() << ' ' << Block->getID();
1512 } dbgs() << '\n';);
1513}
1514
1515bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1516 SIBlockSchedCandidate &TryCand) {
1517 if (!Cand.isValid()) {
1519 return true;
1520 }
1521
1522
1524 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1525 return true;
1526
1528 TryCand, Cand, Latency))
1529 return true;
1530 if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
1531 TryCand, Cand, Depth))
1532 return true;
1534 Cand.NumHighLatencySuccessors,
1536 return true;
1537 return false;
1538}
1539
1540bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1541 SIBlockSchedCandidate &TryCand) {
1542 if (!Cand.isValid()) {
1544 return true;
1545 }
1546
1547 if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1549 return true;
1551 Cand.NumSuccessors > 0,
1553 return true;
1555 return true;
1556 if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1558 return true;
1559 return false;
1560}
1561
1563 SIBlockSchedCandidate Cand;
1564 std::vector<SIScheduleBlock*>::iterator Best;
1566 if (ReadyBlocks.empty())
1567 return nullptr;
1568
1570 VregCurrentUsage, SregCurrentUsage);
1571 if (VregCurrentUsage > maxVregUsage)
1572 maxVregUsage = VregCurrentUsage;
1573 if (SregCurrentUsage > maxSregUsage)
1574 maxSregUsage = SregCurrentUsage;
1575 LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";
1577 : ReadyBlocks) dbgs()
1578 << Block->getID() << ' ';
1579 dbgs() << "\nCurrent Live:\n";
1580 for (unsigned Reg
1581 : LiveRegs) dbgs()
1583 dbgs() << '\n';
1584 dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';
1585 dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';);
1586
1587 Cand.Block = nullptr;
1588 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
1589 E = ReadyBlocks.end(); I != E; ++I) {
1590 SIBlockSchedCandidate TryCand;
1591 TryCand.Block = *I;
1592 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1593 TryCand.VGPRUsageDiff =
1594 checkRegUsageImpact(TryCand.Block->getInRegs(),
1595 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1596 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1597 TryCand.NumHighLatencySuccessors =
1598 TryCand.Block->getNumHighLatencySuccessors();
1599 TryCand.LastPosHighLatParentScheduled =
1600 (unsigned int) std::max (0,
1601 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1602 LastPosWaitedHighLatency);
1603 TryCand.Height = TryCand.Block->Height;
1604
1605 if (VregCurrentUsage > 120 ||
1607 if (!tryCandidateRegUsage(Cand, TryCand) &&
1609 tryCandidateLatency(Cand, TryCand);
1610 } else {
1611 if (!tryCandidateLatency(Cand, TryCand))
1612 tryCandidateRegUsage(Cand, TryCand);
1613 }
1614 if (TryCand.Reason != NoCand) {
1615 Cand.setBest(TryCand);
1616 Best = I;
1617 LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '
1619 }
1620 }
1621
1622 LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';
1623 dbgs() << "Is a block with high latency instruction: "
1624 << (Cand.IsHighLatency ? "yes\n" : "no\n");
1625 dbgs() << "Position of last high latency dependency: "
1626 << Cand.LastPosHighLatParentScheduled << '\n';
1627 dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';
1628 dbgs() << '\n';);
1629
1630 Block = Cand.Block;
1631 ReadyBlocks.erase(Best);
1633}
1634
1635
1636
1637void SIScheduleBlockScheduler::addLiveRegs(std::set &Regs) {
1639
1640 if (.isVirtual())
1641 continue;
1642
1643 (void) LiveRegs.insert(Reg);
1644 }
1645}
1646
1648 std::set &Regs) {
1649 for (unsigned Reg : Regs) {
1650
1651 std::set::iterator Pos = LiveRegs.find(Reg);
1652 assert (Pos != LiveRegs.end() &&
1653 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
1654 LiveRegsConsumers[Reg] >= 1);
1655 --LiveRegsConsumers[Reg];
1656 if (LiveRegsConsumers[Reg] == 0)
1657 LiveRegs.erase(Pos);
1658 }
1659}
1660
1661void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1663 if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1664 ReadyBlocks.push_back(Block.first);
1665
1668 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1669 }
1670}
1671
1673 decreaseLiveRegs(Block, Block->getInRegs());
1674 addLiveRegs(Block->getOutRegs());
1675 releaseBlockSuccs(Block);
1676 for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) {
1677
1678 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1679 LiveRegsConsumers[RegP.first] == 0);
1680 LiveRegsConsumers[RegP.first] += RegP.second;
1681 }
1682 if (LastPosHighLatencyParentScheduled[Block->getID()] >
1683 (unsigned)LastPosWaitedHighLatency)
1684 LastPosWaitedHighLatency =
1685 LastPosHighLatencyParentScheduled[Block->getID()];
1686 ++NumBlockScheduled;
1687}
1688
1689std::vector
1690SIScheduleBlockScheduler::checkRegUsageImpact(std::set &InRegs,
1691 std::set &OutRegs) {
1692 std::vector DiffSetPressure;
1694
1695 for (Register Reg : InRegs) {
1696
1697 if (.isVirtual())
1698 continue;
1699 if (LiveRegsConsumers[Reg] > 1)
1700 continue;
1702 for (; PSetI.isValid(); ++PSetI) {
1703 DiffSetPressure[*PSetI] -= PSetI.getWeight();
1704 }
1705 }
1706
1707 for (Register Reg : OutRegs) {
1708
1709 if (.isVirtual())
1710 continue;
1712 for (; PSetI.isValid(); ++PSetI) {
1713 DiffSetPressure[*PSetI] += PSetI.getWeight();
1714 }
1715 }
1716
1717 return DiffSetPressure;
1718}
1719
1720
1721
1723SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1724 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1727 std::vector<SIScheduleBlock*> ScheduledBlocks;
1729
1730 ScheduledBlocks = Scheduler.getBlocks();
1731
1733 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1734
1737 }
1738
1741 return Res;
1742}
1743
1744
1745
1750}
1751
1753
1754
1755
1756
1757void SIScheduleDAGMI::topologicalSort() {
1759
1762}
1763
1764
1765
1766
1767
1768
1769
1770void SIScheduleDAGMI::moveLowLatencies() {
1771 unsigned DAGSize = SUnits.size();
1772 int LastLowLatencyUser = -1;
1773 int LastLowLatencyPos = -1;
1774
1775 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1776 SUnit *SU = &SUnits[ScheduledSUnits[i]];
1777 bool IsLowLatencyUser = false;
1778 unsigned MinPos = 0;
1779
1780 for (SDep& PredDep : SU->Preds) {
1783 IsLowLatencyUser = true;
1784 }
1785 if (Pred->NodeNum >= DAGSize)
1786 continue;
1787 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1788 if (PredPos >= MinPos)
1789 MinPos = PredPos + 1;
1790 }
1791
1793 unsigned BestPos = LastLowLatencyUser + 1;
1794 if ((int)BestPos <= LastLowLatencyPos)
1795 BestPos = LastLowLatencyPos + 1;
1796 if (BestPos < MinPos)
1797 BestPos = MinPos;
1798 if (BestPos < i) {
1799 for (unsigned u = i; u > BestPos; --u) {
1800 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1801 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1802 }
1803 ScheduledSUnits[BestPos] = SU->NodeNum;
1804 ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1805 }
1806 LastLowLatencyPos = BestPos;
1807 if (IsLowLatencyUser)
1808 LastLowLatencyUser = BestPos;
1809 } else if (IsLowLatencyUser) {
1810 LastLowLatencyUser = i;
1811
1812
1814 bool CopyForLowLat = false;
1815 for (SDep& SuccDep : SU->Succs) {
1817 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1818 continue;
1820 CopyForLowLat = true;
1821 }
1822 }
1823 if (!CopyForLowLat)
1824 continue;
1825 if (MinPos < i) {
1826 for (unsigned u = i; u > MinPos; --u) {
1827 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1828 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1829 }
1830 ScheduledSUnits[MinPos] = SU->NodeNum;
1831 ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1832 }
1833 }
1834 }
1835}
1836
1838 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1839 SUnits[i].isScheduled = false;
1840 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1841 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1842 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1843 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1844 }
1845}
1846
1847
1848template<typename _Iterator> void
1850 unsigned &VgprUsage, unsigned &SgprUsage) {
1851 VgprUsage = 0;
1852 SgprUsage = 0;
1853 for (_Iterator RegI = First; RegI != End; ++RegI) {
1855
1856 if (.isVirtual())
1857 continue;
1859 for (; PSetI.isValid(); ++PSetI) {
1860 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1862 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1864 }
1865 }
1866}
1867
1869{
1873
1876
1882
1883 topologicalSort();
1885
1886
1887
1888
1891
1892
1893
1894 SUnitsLinksBackup = SUnits;
1898
1902
1903 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1906 int64_t OffLatReg;
1909 bool OffsetIsScalable;
1910 if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
1911 OffsetIsScalable, TRI))
1915 }
1916
1920
1921
1922
1926 Variants[] = {
1928
1930
1931
1933
1934
1935 };
1936 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1937 Temp = Scheduler.scheduleVariant(v.first, v.second);
1939 Best = Temp;
1940 }
1941 }
1942
1943
1947 Variants[] = {
1948
1950
1953
1956 };
1957 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1958 Temp = Scheduler.scheduleVariant(v.first, v.second);
1960 Best = Temp;
1961 }
1962 }
1963
1964 ScheduledSUnits = Best.SUs;
1965 ScheduledSUnitsInv.resize(SUnits.size());
1966
1967 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1968 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1969 }
1970
1971 moveLowLatencies();
1972
1973
1974
1977
1978 for (unsigned I : ScheduledSUnits) {
1980
1982
1985 }
1986
1988
1990
1992 dbgs() << "*** Final schedule for "
1995 dbgs() << '\n';
1996 });
1997}
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
static const char * getReasonStr(SIScheduleCandReason Reason)
static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)
SI Machine Scheduler interface.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PSetIterator getPressureSets(Register RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
Iterate over the pressure sets affected by the given physical or virtual register.
unsigned getWeight() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void setPos(MachineBasicBlock::const_iterator Pos)
void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
Wrapper class representing virtual and physical registers.
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
static bool isEXP(const MachineInstr &MI)
bool isHighLatencyDef(int Opc) const override
bool isLowLatencyInstruction(const MachineInstr &MI) const
bool isSUInBlock(SUnit *SU, unsigned ID)
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
void addPred(SIScheduleBlock *Pred)
void printDebug(bool Full)
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
void addUnit(SUnit *SU)
Functions for Block construction.
bool isHighLatencyBlock()
void restoreSULinksLeft()
std::vector< int > BottomUpIndex2SU
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > LowLatencyOffset
std::vector< int > TopDownIndex2SU
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGTopologicalSort * GetTopo()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
MachineRegisterInfo * getMRI()
SIScheduleDAGMI(MachineSchedContext *C)
MachineBasicBlock::iterator getCurrentBottom()
std::vector< unsigned > IsLowLatencySU
MachineBasicBlock::iterator getCurrentTop()
std::set< unsigned > getInRegs()
MachineBasicBlock * getBB()
void initRPTracker(RegPressureTracker &RPTracker)
std::set< unsigned > getOutRegs()
~SIScheduleDAGMI() override
const TargetRegisterInfo * getTRI()
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isScheduled
True once scheduled.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
void dumpNode(const SUnit &SU) const override
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
void dump() const override
RegPressureTracker TopRPTracker
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
reverse_iterator rbegin()
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SISchedulerBlockSchedulerVariant
cl::opt< bool > ViewMISchedDAGs
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SISchedulerBlockCreatorVariant
@ LatenciesAlonePlusConsecutive
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
std::vector< unsigned > SUs
std::vector< int > TopDownIndex2Block
std::vector< SIScheduleBlock * > Blocks
std::vector< int > TopDownBlock2Index
SIScheduleCandReason Reason
void setRepeat(SIScheduleCandReason R)