LLVM: lib/CodeGen/ModuloSchedule.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28
32}
33
34
35
36
37
38
39
41 unsigned &InitVal, unsigned &LoopVal) {
42 assert(Phi.isPHI() && "Expecting a Phi.");
43
44 InitVal = 0;
45 LoopVal = 0;
46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() != Loop)
48 InitVal = Phi.getOperand(i).getReg();
49 else
50 LoopVal = Phi.getOperand(i).getReg();
51
52 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
53}
54
55
57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
60 return 0;
61}
62
63
65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
68 return 0;
69}
70
74 if (Preheader == BB)
75 Preheader = *std::next(BB->pred_begin());
76
77
78
83 unsigned MaxDiff = 0;
84 bool PhiIsSwapped = false;
88 unsigned Diff = 0;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
91 if (MI->isPHI()) {
92 if (isLoopCarried(*MI))
93 ++Diff;
94 else
95 PhiIsSwapped = true;
96 }
97 MaxDiff = std::max(Diff, MaxDiff);
98 }
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
100 }
101 }
102
103 generatePipelinedLoop();
104}
105
106void ModuloScheduleExpander::generatePipelinedLoop() {
109
110
112
113 unsigned MaxStageCount = Schedule.getNumStages() - 1;
114
115
116
117
118
119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120
121
122
123
124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125
126 InstrMapTy InstrMap;
127
129
130
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
134
135
136
138 if (CI->isPHI())
139 continue;
140 unsigned StageNum = Schedule.getStage(CI);
141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
144 InstrMap[NewMI] = CI;
145 }
146
147
148
151 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
153 InstrMap[NewMI] = &MI;
154 }
155
156 NewKernel = KernelBB;
159
160 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
161 InstrMap, MaxStageCount, MaxStageCount, false);
162 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
163 InstrMap, MaxStageCount, MaxStageCount, false);
164
166
168
169 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
170 PrologBBs);
171
172
173
174 splitLifetimes(KernelBB, EpilogBBs);
175
176
177 removeDeadInstructions(KernelBB, EpilogBBs);
178
179
180 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
181
182 delete[] VRMap;
183 delete[] VRMapPhi;
184}
185
187
188 for (auto &I : *BB)
190 BB->clear();
191 BB->eraseFromParent();
192}
193
194
195void ModuloScheduleExpander::generateProlog(unsigned LastStage,
197 ValueMapTy *VRMap,
198 MBBVectorTy &PrologBBs) {
200 InstrMapTy InstrMap;
201
202
203
204
205 for (unsigned i = 0; i < LastStage; ++i) {
206
207
213 PredBB = NewBB;
215
216
217
218 for (int StageNum = i; StageNum >= 0; --StageNum) {
221 BBI != BBE; ++BBI) {
222 if (Schedule.getStage(&*BBI) == StageNum) {
223 if (BBI->isPHI())
224 continue;
226 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
227 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
229 InstrMap[NewMI] = &*BBI;
230 }
231 }
232 }
233 rewritePhiValues(NewBB, i, VRMap, InstrMap);
235 dbgs() << "prolog:\n";
236 NewBB->dump();
237 });
238 }
239
241
242
243
244 unsigned numBranches = TII->removeBranch(*Preheader);
245 if (numBranches) {
248 }
249}
250
251
252
253
254void ModuloScheduleExpander::generateEpilog(
256 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
257 MBBVectorTy &PrologBBs) {
258
259
263 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
264 if (checkBranch)
265 return;
266
268 if (*LoopExitI == KernelBB)
269 ++LoopExitI;
270 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
272
275 InstrMapTy InstrMap;
276
277
278
279
280 int EpilogStage = LastStage + 1;
281 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
285
289
290 if (EpilogStart == LoopExitBB)
291 EpilogStart = NewBB;
292
293
294
295 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
296 for (auto &BBI : *BB) {
297 if (BBI.isPHI())
298 continue;
300 if ((unsigned)Schedule.getStage(In) == StageNum) {
301
302
303 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
304 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
306 InstrMap[NewMI] = In;
307 }
308 }
309 }
310 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
311 InstrMap, LastStage, EpilogStage, i == 1);
312 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
313 InstrMap, LastStage, EpilogStage, i == 1);
314 PredBB = NewBB;
315
317 dbgs() << "epilog:\n";
318 NewBB->dump();
319 });
320 }
321
322
324
325
326
328 assert((OrigBB == TBB || OrigBB == FBB) &&
329 "Unable to determine looping branch direction");
330 if (OrigBB != TBB)
332 else
334
335 if (EpilogBBs.size() > 0) {
339 }
340}
341
342
343
350 if (O.getParent()->getParent() != MBB)
351 O.setReg(ToReg);
354}
355
356
357
361 if (MO.getParent()->getParent() != BB)
362 return true;
363 return false;
364}
365
366
367
368
369void ModuloScheduleExpander::generateExistingPhis(
371 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
372 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
373
374
375
376 unsigned PrologStage = 0;
377 unsigned PrevStage = 0;
378 bool InKernel = (LastStageNum == CurStageNum);
379 if (InKernel) {
380 PrologStage = LastStageNum - 1;
381 PrevStage = CurStageNum;
382 } else {
383 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
384 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
385 }
386
388 BBE = BB->getFirstNonPHI();
389 BBI != BBE; ++BBI) {
390 Register Def = BBI->getOperand(0).getReg();
391
392 unsigned InitVal = 0;
393 unsigned LoopVal = 0;
394 getPhiRegs(*BBI, BB, InitVal, LoopVal);
395
396 unsigned PhiOp1 = 0;
397
398
399 unsigned PhiOp2 = LoopVal;
400 if (VRMap[LastStageNum].count(LoopVal))
401 PhiOp2 = VRMap[LastStageNum][LoopVal];
402
403 int StageScheduled = Schedule.getStage(&*BBI);
405 unsigned NumStages = getStagesForReg(Def, CurStageNum);
406 if (NumStages == 0) {
407
408
409 unsigned NewReg = VRMap[PrevStage][LoopVal];
410 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
411 InitVal, NewReg);
412 if (VRMap[CurStageNum].count(LoopVal))
413 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
414 }
415
416
417
418
419 unsigned MaxPhis = PrologStage + 2;
420 if (!InKernel && (int)PrologStage <= LoopValStage)
421 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
422 unsigned NumPhis = std::min(NumStages, MaxPhis);
423
424 unsigned NewReg = 0;
425 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
426
427
428
429
430
431 int StageDiff = 0;
432 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
433 NumPhis == 1)
434 StageDiff = 1;
435
436
437 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
438 StageDiff = StageScheduled - LoopValStage;
439 for (unsigned np = 0; np < NumPhis; ++np) {
440
441
442
443 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
444 PhiOp1 = InitVal;
445
446 else if (PrologStage >= AccessStage + StageDiff + np &&
447 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
448 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
449
450
451 else if (PrologStage >= AccessStage + StageDiff + np) {
452
453
454 PhiOp1 = LoopVal;
456 int Indirects = 1;
457 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
458 int PhiStage = Schedule.getStage(InstOp1);
459 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
461 else
464 int PhiOpStage = Schedule.getStage(InstOp1);
465 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
466 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
467 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
468 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
469 break;
470 }
471 ++Indirects;
472 }
473 } else
474 PhiOp1 = InitVal;
475
476
478 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
480
482 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
483
484
485
486 if (!InKernel) {
487 int StageDiffAdj = 0;
488 if (LoopValStage != -1 && StageScheduled > LoopValStage)
489 StageDiffAdj = StageScheduled - LoopValStage;
490
491
492 if (np == 0 && PrevStage == LastStageNum &&
493 (StageScheduled != 0 || LoopValStage != 0) &&
494 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
495 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
496
497
498 else if (np > 0 && PrevStage == LastStageNum &&
499 VRMap[PrevStage - np + 1].count(Def))
500 PhiOp2 = VRMap[PrevStage - np + 1][Def];
501
502 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
503 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
504 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
505
506
507 else if (VRMap[PrevStage - np].count(Def) &&
508 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
509 (LoopValStage == StageScheduled)))
510 PhiOp2 = VRMap[PrevStage - np][Def];
511 }
512
513
514
515
516
517 if (LoopDefIsPhi) {
518 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
519 int LVNumStages = getStagesForPhi(LoopVal);
520 int StageDiff = (StageScheduled - LoopValStage);
521 LVNumStages -= StageDiff;
522
523 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
524 NewReg = PhiOp2;
525 unsigned ReuseStage = CurStageNum;
526 if (isLoopCarried(*PhiInst))
527 ReuseStage -= LVNumStages;
528
529
530 if (VRMap[ReuseStage - np].count(LoopVal)) {
531 NewReg = VRMap[ReuseStage - np][LoopVal];
532
533 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
534 Def, NewReg);
535
536 VRMap[CurStageNum - np][Def] = NewReg;
537 PhiOp2 = NewReg;
538 if (VRMap[LastStageNum - np - 1].count(LoopVal))
539 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
540
541 if (IsLast && np == NumPhis - 1)
543 continue;
544 }
545 }
546 }
547 if (InKernel && StageDiff > 0 &&
548 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
549 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
550 }
551
554
557 TII->get(TargetOpcode::PHI), NewReg);
560 if (np == 0)
561 InstrMap[NewPhi] = &*BBI;
562
563
564
565
566 unsigned PrevReg = 0;
567 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
568 PrevReg = VRMap[PrevStage - np][LoopVal];
569 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
570 NewReg, PrevReg);
571
572 if (VRMap[CurStageNum - np].count(Def)) {
573 unsigned R = VRMap[CurStageNum - np][Def];
574 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
575 NewReg);
576 }
577
578
579
580
581 if (IsLast && np == NumPhis - 1)
583
584
585 if (InKernel)
586 PhiOp2 = NewReg;
587
588
589 VRMap[CurStageNum - np][Def] = NewReg;
590 }
591
592 while (NumPhis++ < NumStages) {
593 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
594 NewReg, 0);
595 }
596
597
598
599 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
601 }
602}
603
604
605
606
607void ModuloScheduleExpander::generatePhis(
609 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
610 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
611 bool IsLast) {
612
613
614 unsigned PrologStage = 0;
615 unsigned PrevStage = 0;
616 unsigned StageDiff = CurStageNum - LastStageNum;
617 bool InKernel = (StageDiff == 0);
618 if (InKernel) {
619 PrologStage = LastStageNum - 1;
620 PrevStage = CurStageNum;
621 } else {
622 PrologStage = LastStageNum - StageDiff;
623 PrevStage = LastStageNum + StageDiff - 1;
624 }
625
627 BBE = BB->instr_end();
628 BBI != BBE; ++BBI) {
629 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
632 continue;
633
634 int StageScheduled = Schedule.getStage(&*BBI);
635 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
637 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
638
639
640
641 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
643 NumPhis = 1;
644 if (!InKernel && (unsigned)StageScheduled > PrologStage)
645 continue;
646
647 unsigned PhiOp2;
648 if (InKernel) {
649 PhiOp2 = VRMap[PrevStage][Def];
651 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
653 }
654
655
656 if (NumPhis > PrologStage + 1 - StageScheduled)
657 NumPhis = PrologStage + 1 - StageScheduled;
658 for (unsigned np = 0; np < NumPhis; ++np) {
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681 unsigned PhiOp1 = VRMap[PrologStage][Def];
682 if (np <= PrologStage)
683 PhiOp1 = VRMap[PrologStage - np][Def];
684 if (!InKernel) {
685 if (PrevStage == LastStageNum && np == 0)
686 PhiOp2 = VRMap[LastStageNum][Def];
687 else
688 PhiOp2 = VRMapPhi[PrevStage - np][Def];
689 }
690
693
696 TII->get(TargetOpcode::PHI), NewReg);
699 if (np == 0)
700 InstrMap[NewPhi] = &*BBI;
701
702
703
704 if (InKernel) {
705 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
706 NewReg);
707 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
708 NewReg);
709
710 PhiOp2 = NewReg;
711 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
712 } else {
713 VRMapPhi[CurStageNum - np][Def] = NewReg;
714 if (np == NumPhis - 1)
715 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
716 NewReg);
717 }
718 if (IsLast && np == NumPhis - 1)
720 }
721 }
722 }
723}
724
725
726
727
728
729void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
730 MBBVectorTy &EpilogBBs) {
731
732
736 MI != ME;) {
737
738 if (MI->isInlineAsm()) {
739 ++MI;
740 continue;
741 }
742 bool SawStore = false;
743
744
745 if (->isSafeToMove(SawStore) &&
->isPHI()) {
746 ++MI;
747 continue;
748 }
749 bool used = true;
752
755 if (used)
756 break;
757 continue;
758 }
759 unsigned realUses = 0;
761
762
763 if (U.getParent()->getParent() != BB) {
764 realUses++;
765 used = true;
766 break;
767 }
768 }
769 if (realUses > 0)
770 break;
771 used = false;
772 }
773 if (!used) {
775 MI++->eraseFromParent();
776 continue;
777 }
778 ++MI;
779 }
780
781
783 Register reg = MI.getOperand(0).getReg();
786 MI.eraseFromParent();
787 }
788 }
789}
790
791
792
793
794
795
796
797
798
799
800
801void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
802 MBBVectorTy &EpilogBBs) {
804 for (auto &PHI : KernelBB->phis()) {
806
807
811 if (I->isPHI() && I->getParent() == KernelBB) {
812
814 if (!LCDef)
815 continue;
817 if ( || MI->getParent() != KernelBB || MI->isPHI())
818 continue;
819
820
821 unsigned SplitReg = 0;
824 if (BBJ.readsRegister(Def, nullptr)) {
825
826 if (SplitReg == 0) {
828 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
829 TII->get(TargetOpcode::COPY), SplitReg)
831 }
832 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
833 }
834 if (!SplitReg)
835 continue;
836
837 for (auto &Epilog : EpilogBBs)
839 if (I.readsRegister(Def, nullptr))
840 I.substituteRegister(Def, SplitReg, 0, *TRI);
841 break;
842 }
843 }
844 }
845}
846
847
850 if (.isPHI())
851 break;
852 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
853 if (MI.getOperand(i + 1).getMBB() == Incoming) {
854 MI.removeOperand(i + 1);
855 MI.removeOperand(i);
856 break;
857 }
858 }
859}
860
861
862
863
864void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
865 MBBVectorTy &PrologBBs,
867 MBBVectorTy &EpilogBBs,
868 ValueMapTy *VRMap) {
869 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
872
873
874
876 unsigned MaxIter = PrologBBs.size() - 1;
877 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
878
879
882
884 std::optional StaticallyGreater =
886 unsigned numAdded = 0;
887 if (!StaticallyGreater) {
890 } else if (*StaticallyGreater == false) {
892 Prolog->removeSuccessor(LastPro);
896
897 if (LastPro != LastEpi) {
898 LastEpi->clear();
900 }
901 if (LastPro == KernelBB) {
903 NewKernel = nullptr;
904 }
905 LastPro->clear();
907 } else {
910 }
914 E = Prolog->instr_rend();
915 I != E && numAdded > 0; ++I, --numAdded)
916 updateInstruction(&*I, false, j, 0, VRMap);
917 }
918
919 if (NewKernel) {
920 LoopInfo->setPreheader(PrologBBs[MaxIter]);
921 LoopInfo->adjustTripCount(-(MaxIter + 1));
922 }
923}
924
925
926
927bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
931 bool OffsetIsScalable;
933 return false;
934
935
936 if (OffsetIsScalable)
937 return false;
938
939 if (!BaseOp->isReg())
940 return false;
941
943
945
947 if (BaseDef && BaseDef->isPHI()) {
949 BaseDef = MRI.getVRegDef(BaseReg);
950 }
951 if (!BaseDef)
952 return false;
953
954 int D = 0;
956 return false;
957
958 Delta = D;
959 return true;
960}
961
962
963
964
965void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
967 unsigned Num) {
968 if (Num == 0)
969 return;
970
971
973 return;
976
977 if (MMO->isVolatile() || MMO->isAtomic() ||
978 (MMO->isInvariant() && MMO->isDereferenceable()) ||
979 (!MMO->getValue())) {
981 continue;
982 }
983 unsigned Delta;
984 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
985 int64_t AdjOffset = Delta * Num;
988 } else {
991 }
992 }
994}
995
996
997
999 unsigned CurStageNum,
1000 unsigned InstStageNum) {
1002 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1003 return NewMI;
1004}
1005
1006
1007
1008
1009MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1010 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1012 auto It = InstrChanges.find(OldMI);
1013 if (It != InstrChanges.end()) {
1014 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1015 unsigned BasePos, OffsetPos;
1017 return nullptr;
1019 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1020 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1021 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1023 }
1024 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1025 return NewMI;
1026}
1027
1028
1029
1030void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1031 bool LastDef,
1032 unsigned CurStageNum,
1033 unsigned InstrStageNum,
1034 ValueMapTy *VRMap) {
1037 continue;
1039 if (MO.isDef()) {
1040
1042 Register NewReg = MRI.createVirtualRegister(RC);
1044 VRMap[CurStageNum][reg] = NewReg;
1045 if (LastDef)
1047 } else if (MO.isUse()) {
1049
1050 int DefStageNum = Schedule.getStage(Def);
1051 unsigned StageNum = CurStageNum;
1052 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1053
1054 unsigned StageDiff = (InstrStageNum - DefStageNum);
1055
1056 StageNum -= StageDiff;
1057 }
1058 if (VRMap[StageNum].count(reg))
1059 MO.setReg(VRMap[StageNum][reg]);
1060 }
1061 }
1062}
1063
1064
1065
1066
1067MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1070 while (Def->isPHI()) {
1071 if (!Visited.insert(Def).second)
1072 break;
1073 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1074 if (Def->getOperand(i + 1).getMBB() == BB) {
1075 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1076 break;
1077 }
1078 }
1079 return Def;
1080}
1081
1082
1083unsigned ModuloScheduleExpander::getPrevMapVal(
1084 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1086 unsigned PrevVal = 0;
1087 if (StageNum > PhiStage) {
1089 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1090
1091 PrevVal = VRMap[StageNum - 1][LoopVal];
1092 else if (VRMap[StageNum].count(LoopVal))
1093
1094
1095 PrevVal = VRMap[StageNum][LoopVal];
1096 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1097
1098 PrevVal = LoopVal;
1099 else if (StageNum == PhiStage + 1)
1100
1102 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1103
1104 PrevVal =
1105 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1106 LoopStage, VRMap, BB);
1107 }
1108 return PrevVal;
1109}
1110
1111
1112
1113
1114
1115void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1116 unsigned StageNum,
1117 ValueMapTy *VRMap,
1118 InstrMapTy &InstrMap) {
1119 for (auto &PHI : BB->phis()) {
1120 unsigned InitVal = 0;
1121 unsigned LoopVal = 0;
1123 Register PhiDef = PHI.getOperand(0).getReg();
1124
1125 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1126 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1127 unsigned NumPhis = getStagesForPhi(PhiDef);
1128 if (NumPhis > StageNum)
1129 NumPhis = StageNum;
1130 for (unsigned np = 0; np <= NumPhis; ++np) {
1131 unsigned NewVal =
1132 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1133 if (!NewVal)
1134 NewVal = InitVal;
1135 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1136 NewVal);
1137 }
1138 }
1139}
1140
1141
1142
1143
1144void ModuloScheduleExpander::rewriteScheduledInstr(
1145 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1146 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1147 unsigned PrevReg) {
1149 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1150
1151
1156 continue;
1159 continue;
1161 continue;
1162 }
1164 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1166 int StageSched = Schedule.getStage(OrigMI);
1167 int CycleSched = Schedule.getCycle(OrigMI);
1168 unsigned ReplaceReg = 0;
1169
1170 if (StagePhi == StageSched && Phi->isPHI()) {
1171 int CyclePhi = Schedule.getCycle(Phi);
1172 if (PrevReg && InProlog)
1173 ReplaceReg = PrevReg;
1174 else if (PrevReg && !isLoopCarried(*Phi) &&
1175 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1176 ReplaceReg = PrevReg;
1177 else
1178 ReplaceReg = NewReg;
1179 }
1180
1181
1182 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1183 ReplaceReg = NewReg;
1184 if (StagePhi > StageSched && Phi->isPHI())
1185 ReplaceReg = NewReg;
1186 if (!InProlog && ->isPHI() && StagePhi < StageSched)
1187 ReplaceReg = NewReg;
1188 if (ReplaceReg) {
1190 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1191 if (NRC)
1192 UseOp.setReg(ReplaceReg);
1193 else {
1194 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1196 SplitReg)
1197 .addReg(ReplaceReg);
1198 UseOp.setReg(SplitReg);
1199 }
1200 }
1201 }
1202}
1203
1204bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1205 if (.isPHI())
1206 return false;
1207 int DefCycle = Schedule.getCycle(&Phi);
1208 int DefStage = Schedule.getStage(&Phi);
1209
1210 unsigned InitVal = 0;
1211 unsigned LoopVal = 0;
1212 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1214 if ( || Use->isPHI())
1215 return true;
1218 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1219}
1220
1221
1222
1223
1224
1225
1226
1227
1228namespace {
1229
1230
1232 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1233 bool Changed = true;
1234 while (Changed) {
1235 Changed = false;
1238 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1239 if (LIS)
1241 MI.eraseFromParent();
1242 Changed = true;
1243 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1245 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1246 MRI.getRegClass(MI.getOperand(0).getReg()));
1247 assert(ConstrainRegClass &&
1248 "Expected a valid constrained register class!");
1249 (void)ConstrainRegClass;
1250 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1251 MI.getOperand(1).getReg());
1252 if (LIS)
1254 MI.eraseFromParent();
1255 Changed = true;
1256 }
1257 }
1258 }
1259}
1260
1261
1262
1263class KernelRewriter {
1270
1271
1273
1274
1276
1278
1279
1280
1282
1283
1284
1287
1289
1290public:
1293 void rewrite();
1294};
1295}
1296
1299 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1300 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1301 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1303 if (PreheaderBB == BB)
1304 PreheaderBB = *std::next(BB->pred_begin());
1305}
1306
1307void KernelRewriter::rewrite() {
1308
1309
1310
1311
1315 if (MI->isPHI())
1316 continue;
1317 if (MI->getParent())
1318 MI->removeFromParent();
1320 if (!FirstMI)
1321 FirstMI = MI;
1322 }
1323 assert(FirstMI && "Failed to find first MI in schedule");
1324
1325
1326
1328 if (LIS)
1330 (I++)->eraseFromParent();
1331 }
1332
1333
1335 if (MI.isPHI() || MI.isTerminator())
1336 continue;
1339 continue;
1342 }
1343 }
1344 EliminateDeadPhis(BB, MRI, LIS);
1345
1346
1347
1348
1349
1350 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1351 if (MI->isPHI()) {
1352 Register R = MI->getOperand(0).getReg();
1354 continue;
1355 }
1356
1359 if (MI.getParent() != BB) {
1361 break;
1362 }
1363 }
1364 }
1365 }
1366}
1367
1370 if (!Producer)
1371 return Reg;
1372
1373 int ConsumerStage = S.getStage(&MI);
1375
1376
1377 if (Producer->getParent() != BB)
1378
1379 return Reg;
1380 int ProducerStage = S.getStage(Producer);
1381 assert(ConsumerStage != -1 &&
1382 "In-loop consumer should always be scheduled!");
1383 assert(ConsumerStage >= ProducerStage);
1384 unsigned StageDiff = ConsumerStage - ProducerStage;
1385
1386 for (unsigned I = 0; I < StageDiff; ++I)
1387 Reg = phi(Reg);
1388 return Reg;
1389 }
1390
1391
1392
1395 auto LoopProducer = Producer;
1396 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1399 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1400 assert(LoopProducer);
1401 }
1402 int LoopProducerStage = S.getStage(LoopProducer);
1403
1404 std::optional IllegalPhiDefault;
1405
1406 if (LoopProducerStage == -1) {
1407
1408 } else if (LoopProducerStage > ConsumerStage) {
1409
1410
1411
1412
1413#ifndef NDEBUG
1414 int LoopProducerCycle = S.getCycle(LoopProducer);
1415 int ConsumerCycle = S.getCycle(&MI);
1416#endif
1417 assert(LoopProducerCycle <= ConsumerCycle);
1418 assert(LoopProducerStage == ConsumerStage + 1);
1419
1420
1421
1422
1423
1424
1425 IllegalPhiDefault = Defaults.front();
1427 } else {
1428 assert(ConsumerStage >= LoopProducerStage);
1429 int StageDiff = ConsumerStage - LoopProducerStage;
1430 if (StageDiff > 0) {
1431 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1432 << " to " << (Defaults.size() + StageDiff) << "\n");
1433
1434
1435
1436 Defaults.resize(Defaults.size() + StageDiff,
1437 Defaults.empty() ? std::optional()
1438 : Defaults.back());
1439 }
1440 }
1441
1442
1443 auto DefaultI = Defaults.rbegin();
1444 while (DefaultI != Defaults.rend())
1445 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1446
1447 if (IllegalPhiDefault) {
1448
1449
1450
1451
1452
1453 auto RC = MRI.getRegClass(Reg);
1457 .addReg(*IllegalPhiDefault)
1458 .addMBB(PreheaderBB)
1460 .addMBB(BB);
1461
1462
1463 S.setStage(IllegalPhi, LoopProducerStage);
1464 return R;
1465 }
1466
1467 return LoopReg;
1468}
1469
1470Register KernelRewriter::phi(Register LoopReg, std::optional InitReg,
1472
1473 if (InitReg) {
1474 auto I = Phis.find({LoopReg, *InitReg});
1475 if (I != Phis.end())
1476 return I->second;
1477 } else {
1478 for (auto &KV : Phis) {
1479 if (KV.first.first == LoopReg)
1480 return KV.second;
1481 }
1482 }
1483
1484
1485
1486 auto I = UndefPhis.find(LoopReg);
1487 if (I != UndefPhis.end()) {
1489 if (!InitReg)
1490
1491
1492 return R;
1493
1495 MI->getOperand(1).setReg(*InitReg);
1496 Phis.insert({{LoopReg, *InitReg}, R});
1498 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1499 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1500 (void)ConstrainRegClass;
1501 UndefPhis.erase(I);
1502 return R;
1503 }
1504
1505
1506 if (!RC)
1507 RC = MRI.getRegClass(LoopReg);
1509 if (InitReg) {
1511 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1512 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1513 (void)ConstrainRegClass;
1514 }
1515 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1516 .addReg(InitReg ? *InitReg : undef(RC))
1517 .addMBB(PreheaderBB)
1520 if (!InitReg)
1521 UndefPhis[LoopReg] = R;
1522 else
1523 Phis[{LoopReg, *InitReg}] = R;
1524 return R;
1525}
1526
1529 if (R == 0) {
1530
1531
1532
1533 R = MRI.createVirtualRegister(RC);
1534 auto *InsertBB = &PreheaderBB->getParent()->front();
1535 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1536 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1537 }
1538 return R;
1539}
1540
1541namespace {
1542
1543
1544
1545class KernelOperandInfo {
1551
1552public:
1558 while (isRegInLoop(MO)) {
1560 if (MI->isFullCopy()) {
1561 MO = &MI->getOperand(1);
1562 continue;
1563 }
1564 if (->isPHI())
1565 break;
1566
1567 if (IllegalPhis.count(MI)) {
1568 MO = &MI->getOperand(3);
1569 continue;
1570 }
1571
1573 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1574 : &MI->getOperand(3);
1576 }
1578 }
1579
1581 return PhiDefaults.size() == Other.PhiDefaults.size();
1582 }
1583
1585 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1586 << *Source->getParent();
1587 }
1588
1589private:
1592 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1593 }
1594};
1595}
1596
1602 else
1604 for (auto I = BB->begin(), NI = NewBB->begin(); ->isTerminator();
1605 ++I, ++NI) {
1610 }
1611 return NewBB;
1612}
1613
1615 int MinStage) {
1617 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1620 if (Stage == -1 || Stage >= MinStage)
1621 continue;
1622
1626
1627
1630 MI->getParent());
1632 }
1633 for (auto &Sub : Subs)
1634 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1636 }
1637 if (LIS)
1639 MI->eraseFromParent();
1640 }
1641}
1642
1649 if (MI.isPHI()) {
1650
1651
1653
1654
1655 Register PhiR = MI.getOperand(0).getReg();
1664 Remaps[PhiR] = NR;
1665 }
1666 }
1668 continue;
1669 MI.removeFromParent();
1670 DestBB->insert(InsertPt, &MI);
1673 BlockMIs.erase({SourceBB, KernelMI});
1674 }
1677 assert(MI.getNumOperands() == 3);
1679
1680
1681 if (getStage(Def) == Stage) {
1682 Register PhiReg = MI.getOperand(0).getReg();
1683 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1684 nullptr) != -1);
1686 MI.getOperand(0).setReg(PhiReg);
1688 }
1689 }
1690 for (auto *P : PhiToDelete)
1691 P->eraseFromParent();
1693
1694
1697 DestBB->insert(InsertPt, NewMI);
1698 Register OrigR = Phi->getOperand(0).getReg();
1703 Remaps[OrigR] = R;
1707 return R;
1708 };
1711 if (!MO.isReg())
1712 continue;
1715 else {
1716
1717
1719 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1722 }
1723 }
1724 }
1725 }
1726}
1727
1734 for (unsigned I = 0; I < distance; ++I) {
1737 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1739 std::swap(LoopRegIdx, InitRegIdx);
1740 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1742 }
1743 return CanonicalUseReg;
1744}
1745
1751
1752
1753 LS.reset();
1755 LS[I] = true;
1759 }
1760
1761
1762
1763
1764
1765
1766
1768 EliminateDeadPhis(ExitingBB, MRI, LIS, true);
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1789
1790
1791 EliminateDeadPhis(B, MRI, LIS, true);
1794 }
1795 for (size_t I = 0; I < Epilogs.size(); I++) {
1796 LS.reset();
1797 for (size_t J = I; J < Epilogs.size(); J++) {
1798 int Iteration = J;
1800
1801 for (size_t K = Iteration; K > I; K--)
1803 LS[Stage] = true;
1804 }
1807 }
1808
1809
1810
1811
1812 auto PI = Prologs.begin();
1813 auto EI = Epilogs.begin();
1815 for (; PI != Prologs.end(); ++PI, ++EI) {
1817 (*PI)->addSuccessor(*EI);
1819 Register Reg = MI.getOperand(1).getReg();
1821 if (Use && Use->getParent() == Pred) {
1823 if (CanonicalUse->isPHI()) {
1824
1825
1826
1828 }
1830 }
1833 }
1834 }
1835
1836
1841
1842
1844 for (auto I = B->instr_rbegin();
1845 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1848 }
1849 }
1851 if (LIS)
1853 MI->eraseFromParent();
1854 }
1856
1857
1859 EliminateDeadPhis(B, MRI, LIS);
1860 EliminateDeadPhis(ExitingBB, MRI, LIS);
1861}
1862
1866 if (Exit == BB)
1868
1871
1872
1875 Register OldR = MI.getOperand(3).getReg();
1879 if (Use.getParent() != BB)
1882 Use->substituteRegister(OldR, R, 0,
1889 }
1891 Exit->replacePhiUsesWith(BB, NewBB);
1893
1897 (void)CanAnalyzeBr;
1898 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1903 return NewBB;
1904}
1905
1910 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, nullptr);
1912}
1913
1915 if (MI->isPHI()) {
1916
1917
1918 Register PhiR = MI->getOperand(0).getReg();
1919 Register R = MI->getOperand(3).getReg();
1921 if (RMIStage != -1 && [MI->getParent()].test(RMIStage))
1922 R = MI->getOperand(1).getReg();
1925
1926
1927 MI->getOperand(0).setReg(PhiR);
1929 return;
1930 }
1931
1933 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1935
1936 return;
1937
1941
1942
1945 MI->getParent());
1947 }
1948 for (auto &Sub : Subs)
1949 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1951 }
1952 if (LIS)
1954 MI->eraseFromParent();
1955}
1956
1958
1959 bool KernelDisposed = false;
1962 ++PI, ++EI, --TC) {
1968 std::optional StaticallyGreater =
1970 if (!StaticallyGreater) {
1971 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1972
1974 } else if (*StaticallyGreater == false) {
1975 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1976
1977
1978 Prolog->removeSuccessor(Fallthrough);
1980 P.removeOperand(2);
1981 P.removeOperand(1);
1982 }
1984 KernelDisposed = true;
1985 } else {
1986 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1987
1990 P.removeOperand(4);
1991 P.removeOperand(3);
1992 }
1993 }
1994 }
1995
1996 if (!KernelDisposed) {
1999 } else {
2001 }
2002}
2003
2006 KR.rewrite();
2007}
2008
2015
2019}
2020
2024
2025
2026
2027 std::string ScheduleDump;
2031
2032
2033
2034 assert(LIS && "Requires LiveIntervals!");
2039 if (!ExpandedKernel) {
2040
2042 return;
2043 }
2044
2046
2047
2049 KR.rewrite();
2051
2052
2053
2056 if (NI->isPHI())
2057 IllegalPhis.insert(&*NI);
2058 }
2059
2060
2061
2063 auto OI = ExpandedKernel->begin();
2065 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2066 while (OI->isPHI() || OI->isFullCopy())
2067 ++OI;
2068 while (NI->isPHI() || NI->isFullCopy())
2069 ++NI;
2070 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2071
2072 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2073 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2074 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2075 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2076 }
2077
2078 bool Failed = false;
2079 for (auto &OldAndNew : KOIs) {
2080 if (OldAndNew.first == OldAndNew.second)
2081 continue;
2083 errs() << "Modulo kernel validation error: [\n";
2084 errs() << " [golden] ";
2085 OldAndNew.first.print(errs());
2086 errs() << " ";
2087 OldAndNew.second.print(errs());
2088 errs() << "]\n";
2089 }
2090
2092 errs() << "Golden reference kernel:\n";
2094 errs() << "New kernel:\n";
2096 errs() << ScheduleDump;
2098 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2099 }
2100
2101
2102
2105}
2106
2109
2110
2112
2113 return NewMI;
2114}
2115
2116
2117
2118
2121 if (Exit->pred_size() == 1)
2122 return Exit;
2123
2126
2129 MF->insert(Loop->getIterator(), NewExit);
2130
2135 FBB = NewExit;
2136 else if (FBB == Loop)
2137 TBB = NewExit;
2138 else
2142 Loop->replaceSuccessor(Exit, NewExit);
2143 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2145
2146 Exit->replacePhiUsesWith(Loop, NewExit);
2147
2148 return NewExit;
2149}
2150
2151
2152
2153
2155 int RequiredTC,
2156 InstrMapTy &LastStage0Insts,
2160 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2161 LastStage0Insts);
2162
2164
2165
2169 } else {
2171 }
2172}
2173
2174
2175
2176
2177
2178
2179void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2254 assert(LoopInfo && "Must be able to analyze loop!");
2255
2256 calcNumUnroll();
2257
2263
2269
2271
2274
2278
2281
2282 Prolog->addSuccessor(NewKernel);
2283
2286
2287 Epilog->addSuccessor(NewPreheader);
2288 Epilog->addSuccessor(NewExit);
2289
2290 InstrMapTy LastStage0Insts;
2291 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2292 LastStage0Insts, *Prolog, *NewPreheader);
2293
2294
2295
2297 generateProlog(PrologVRMap);
2298 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2299 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2300}
2301
2302
2303void ModuloScheduleExpanderMVE::updateInstrUse(
2307
2308
2309
2310
2311
2312
2314 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2315 continue;
2316 int DiffStage = 0;
2317 Register OrigReg = UseMO.getReg();
2319 if (!DefInst || DefInst->getParent() != OrigKernel)
2320 continue;
2321 unsigned InitReg = 0;
2322 unsigned DefReg = OrigReg;
2323 if (DefInst->isPHI()) {
2324 ++DiffStage;
2325 unsigned LoopReg;
2326 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2327
2328 DefReg = LoopReg;
2329 DefInst = MRI.getVRegDef(LoopReg);
2330 }
2331 unsigned DefStageNum = Schedule.getStage(DefInst);
2332 DiffStage += StageNum - DefStageNum;
2334 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2335
2336 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2337 else if (!PrevVRMap)
2338
2339
2340 NewReg = InitReg;
2341 else
2342
2343
2344
2345
2346 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2347
2349 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2350 if (NRC)
2351 UseMO.setReg(NewReg);
2352 else {
2353 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2354 BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
2355 SplitReg)
2357 UseMO.setReg(SplitReg);
2358 }
2359 }
2360}
2361
2362
2363
2366 unsigned InitVal, LoopVal;
2368 if (LoopVal == Reg)
2369 return Φ
2370 }
2371 return nullptr;
2372}
2373
2374
2375void ModuloScheduleExpanderMVE::generatePhi(
2380 int StageNum = Schedule.getStage(OrigMI);
2381 bool UsePrologReg;
2382 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2383 UsePrologReg = true;
2384 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2385 UsePrologReg = false;
2386 else
2387 return;
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2421 if (!DefMO.isReg() || DefMO.isDead())
2422 continue;
2423 Register OrigReg = DefMO.getReg();
2424 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2425 if (NewReg == KernelVRMap[UnrollNum].end())
2426 continue;
2428 if (UsePrologReg) {
2429 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2430 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2431 } else {
2433 if (!Phi)
2434 continue;
2435 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2436 }
2437
2439 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2441 TII->get(TargetOpcode::PHI), PhiReg)
2442 .addReg(NewReg->second)
2444 .addReg(CorrespondReg)
2446 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2447 }
2448}
2449
2452 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2453 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2454 Phi.getOperand(Idx).setReg(NewReg);
2455 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2456 return;
2457 }
2458 }
2459}
2460
2461
2462void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2467 E = MRI.use_end();
2470 if (O.getParent()->getParent() != OrigKernel &&
2471 O.getParent()->getParent() != Prolog &&
2472 O.getParent()->getParent() != NewKernel &&
2473 O.getParent()->getParent() != Epilog)
2475 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2477 }
2478
2479
2480
2481 if (!UsesAfterLoop.empty()) {
2482 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2484 TII->get(TargetOpcode::PHI), PhiReg)
2489
2492
2495 }
2496
2497
2498
2499 if (!LoopPhis.empty()) {
2501 unsigned InitReg, LoopReg;
2502 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2503 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2505 TII->get(TargetOpcode::PHI), NewInit)
2510 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2511 }
2512 }
2513}
2514
2515void ModuloScheduleExpanderMVE::generateProlog(
2517 PrologVRMap.clear();
2520 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2521 ++PrologNum) {
2523 if (MI->isPHI())
2524 continue;
2525 int StageNum = Schedule.getStage(MI);
2526 if (StageNum > PrologNum)
2527 continue;
2529 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2530 NewMIMap[NewMI] = {PrologNum, StageNum};
2531 Prolog->push_back(NewMI);
2532 }
2533 }
2534
2535 for (auto I : NewMIMap) {
2537 int PrologNum = I.second.first;
2538 int StageNum = I.second.second;
2539 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2540 }
2541
2543 dbgs() << "prolog:\n";
2545 });
2546}
2547
2548void ModuloScheduleExpanderMVE::generateKernel(
2551 KernelVRMap.clear();
2552 KernelVRMap.resize(NumUnroll);
2554 PhiVRMap.resize(NumUnroll);
2557 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2559 if (MI->isPHI())
2560 continue;
2561 int StageNum = Schedule.getStage(MI);
2563 if (UnrollNum == NumUnroll - 1)
2564 LastStage0Insts[MI] = NewMI;
2565 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2566 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2567 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2568 NewMIMap[NewMI] = {UnrollNum, StageNum};
2570 }
2571 }
2572
2573 for (auto I : NewMIMap) {
2575 int UnrollNum = I.second.first;
2576 int StageNum = I.second.second;
2577 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2578 }
2579
2580
2581 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2582 *Epilog);
2583
2585 dbgs() << "kernel:\n";
2586 NewKernel->dump();
2587 });
2588}
2589
2590void ModuloScheduleExpanderMVE::generateEpilog(
2593 EpilogVRMap.clear();
2596 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2597 ++EpilogNum) {
2599 if (MI->isPHI())
2600 continue;
2601 int StageNum = Schedule.getStage(MI);
2602 if (StageNum <= EpilogNum)
2603 continue;
2605 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2606 NewMIMap[NewMI] = {EpilogNum, StageNum};
2607 Epilog->push_back(NewMI);
2608 }
2609 }
2610
2611 for (auto I : NewMIMap) {
2613 int EpilogNum = I.second.first;
2614 int StageNum = I.second.second;
2615 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2616 }
2617
2618
2619
2620
2621
2622 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2623
2625 dbgs() << "epilog:\n";
2627 });
2628}
2629
2630
2631void ModuloScheduleExpanderMVE::calcNumUnroll() {
2633 NumUnroll = 1;
2636
2638 if (MI->isPHI())
2639 continue;
2640 int StageNum = Schedule.getStage(MI);
2643 continue;
2646 continue;
2647
2648 int NumUnrollLocal = 1;
2650 ++NumUnrollLocal;
2651
2652
2654 }
2655 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2656 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2657 --NumUnrollLocal;
2658 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2659 }
2660 }
2661 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2662}
2663
2664
2665
2666
2667void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2668 ValueMapTy &VRMap,
2669 bool LastDef) {
2672 continue;
2675 Register NewReg = MRI.createVirtualRegister(RC);
2677 VRMap[Reg] = NewReg;
2678 if (LastDef)
2679 mergeRegUsesAfterPipeline(Reg, NewReg);
2680 }
2681}
2682
2687
2689
2690 generatePipelinedLoop();
2691}
2692
2693
2695 if (!L.getExitBlock()) {
2696 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2697 return false;
2698 }
2699
2702
2703
2704
2707
2708
2712 if (Ref.getParent() != BB || Ref.isPHI()) {
2713 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2714 "referenced outside of the loop or by phi.\n");
2715 return false;
2716 }
2717
2718
2719
2720
2721 unsigned InitVal, LoopVal;
2723 if ((LoopVal).isVirtual() ||
2724 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2726 dbgs() << "Can not apply MVE expander: A phi source value coming "
2727 "from the loop is not defined in the loop.\n");
2728 return false;
2729 }
2730 if (UsedByPhi.count(LoopVal)) {
2731 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2732 "loop is referenced by two or more phis.\n");
2733 return false;
2734 }
2735 UsedByPhi.insert(LoopVal);
2736 }
2737
2738 return true;
2739}
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754namespace {
2756public:
2757 static char ID;
2758
2761 }
2762
2763 bool runOnMachineFunction(MachineFunction &MF) override;
2765
2766 void getAnalysisUsage(AnalysisUsage &AU) const override {
2770 }
2771};
2772}
2773
2774char ModuloScheduleTest::ID = 0;
2775
2777 "Modulo Schedule test pass", false, false)
2782
2783bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2784 MachineLoopInfo &MLI = getAnalysis().getLI();
2785 for (auto *L : MLI) {
2786 if (L->getTopBlock() != L->getBottomBlock())
2787 continue;
2788 runOnLoop(MF, *L);
2789 return false;
2790 }
2791 return false;
2792}
2793
2795 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2796 std::pair<StringRef, StringRef> StageTokenAndValue =
2797 getToken(StageAndCycle.first, "-");
2798 std::pair<StringRef, StringRef> CycleTokenAndValue =
2799 getToken(StageAndCycle.second, "-");
2800 if (StageTokenAndValue.first != "Stage" ||
2801 CycleTokenAndValue.first != "_Cycle") {
2803 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2804 return;
2805 }
2806
2807 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2808 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2809
2810 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2811}
2812
2814 LiveIntervals &LIS = getAnalysis().getLIS();
2816 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2817
2819 std::vector<MachineInstr *> Instrs;
2821 if (MI.isTerminator())
2822 continue;
2823 Instrs.push_back(&MI);
2825 dbgs() << "Parsing post-instr symbol for " << MI;
2827 }
2828 }
2829
2831 std::move(Stage));
2834 MSE.expand();
2835 MSE.cleanup();
2836}
2837
2838
2839
2840
2841
2848 MI->setPostInstrSymbol(MF, Sym);
2849 }
2850}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
DenseMap< Block *, BlockRelaxAux > Blocks
global merge Global merge function pass
const HexagonInstrInfo * TII
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)
Create a dedicated exit for Loop.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
succ_iterator succ_begin()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
pred_iterator pred_begin()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
ModuloSchedule & Schedule
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
const TargetInstrInfo * TII
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
MachineRegisterInfo & MRI
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
reverse_iterator rbegin()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt copy(R &&Range, OutputIt Out)
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...