LLVM: lib/CodeGen/ModuloSchedule.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28
32}
33
34
35
36
37
38
39
41 unsigned &InitVal, unsigned &LoopVal) {
42 assert(Phi.isPHI() && "Expecting a Phi.");
43
44 InitVal = 0;
45 LoopVal = 0;
46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() != Loop)
48 InitVal = Phi.getOperand(i).getReg();
49 else
50 LoopVal = Phi.getOperand(i).getReg();
51
52 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
53}
54
55
57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
60 return 0;
61}
62
63
65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
68 return 0;
69}
70
74 if (Preheader == BB)
75 Preheader = *std::next(BB->pred_begin());
76
77
78
83 unsigned MaxDiff = 0;
84 bool PhiIsSwapped = false;
88 unsigned Diff = 0;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
91 if (MI->isPHI()) {
92 if (isLoopCarried(*MI))
93 ++Diff;
94 else
95 PhiIsSwapped = true;
96 }
97 MaxDiff = std::max(Diff, MaxDiff);
98 }
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
100 }
101 }
102
103 generatePipelinedLoop();
104}
105
106void ModuloScheduleExpander::generatePipelinedLoop() {
109
110
112
113 unsigned MaxStageCount = Schedule.getNumStages() - 1;
114
115
116
117
118
119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120
121
122
123
124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125
126 InstrMapTy InstrMap;
127
129
130
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
134
135
136
138 if (CI->isPHI())
139 continue;
140 unsigned StageNum = Schedule.getStage(CI);
141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
144 InstrMap[NewMI] = CI;
145 }
146
147
148
151 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
153 InstrMap[NewMI] = &MI;
154 }
155
156 NewKernel = KernelBB;
159
160 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
161 InstrMap, MaxStageCount, MaxStageCount, false);
162 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
163 InstrMap, MaxStageCount, MaxStageCount, false);
164
166
168
169 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
170 PrologBBs);
171
172
173
174 splitLifetimes(KernelBB, EpilogBBs);
175
176
177 removeDeadInstructions(KernelBB, EpilogBBs);
178
179
180 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
181
182 delete[] VRMap;
183 delete[] VRMapPhi;
184}
185
187
188 for (auto &I : *BB)
190 BB->clear();
191 BB->eraseFromParent();
192}
193
194
195void ModuloScheduleExpander::generateProlog(unsigned LastStage,
197 ValueMapTy *VRMap,
198 MBBVectorTy &PrologBBs) {
200 InstrMapTy InstrMap;
201
202
203
204
205 for (unsigned i = 0; i < LastStage; ++i) {
206
207
213 PredBB = NewBB;
215
216
217
218 for (int StageNum = i; StageNum >= 0; --StageNum) {
221 BBI != BBE; ++BBI) {
222 if (Schedule.getStage(&*BBI) == StageNum) {
223 if (BBI->isPHI())
224 continue;
226 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
227 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
229 InstrMap[NewMI] = &*BBI;
230 }
231 }
232 }
233 rewritePhiValues(NewBB, i, VRMap, InstrMap);
235 dbgs() << "prolog:\n";
236 NewBB->dump();
237 });
238 }
239
241
242
243
244 unsigned numBranches = TII->removeBranch(*Preheader);
245 if (numBranches) {
248 }
249}
250
251
252
253
254void ModuloScheduleExpander::generateEpilog(
256 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
257 MBBVectorTy &PrologBBs) {
258
259
263 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
264 if (checkBranch)
265 return;
266
268 if (*LoopExitI == KernelBB)
269 ++LoopExitI;
270 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
272
275 InstrMapTy InstrMap;
276
277
278
279
280 int EpilogStage = LastStage + 1;
281 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
285
289
290 if (EpilogStart == LoopExitBB)
291 EpilogStart = NewBB;
292
293
294
295 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
296 for (auto &BBI : *BB) {
297 if (BBI.isPHI())
298 continue;
300 if ((unsigned)Schedule.getStage(In) == StageNum) {
301
302
303 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
304 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
306 InstrMap[NewMI] = In;
307 }
308 }
309 }
310 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
311 InstrMap, LastStage, EpilogStage, i == 1);
312 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
313 InstrMap, LastStage, EpilogStage, i == 1);
314 PredBB = NewBB;
315
317 dbgs() << "epilog:\n";
318 NewBB->dump();
319 });
320 }
321
322
324
325
326
328 assert((OrigBB == TBB || OrigBB == FBB) &&
329 "Unable to determine looping branch direction");
330 if (OrigBB != TBB)
332 else
334
335 if (EpilogBBs.size() > 0) {
339 }
340}
341
342
343
350 if (O.getParent()->getParent() != MBB)
351 O.setReg(ToReg);
354}
355
356
357
361 if (MO.getParent()->getParent() != BB)
362 return true;
363 return false;
364}
365
366
367
368
369void ModuloScheduleExpander::generateExistingPhis(
371 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
372 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
373
374
375
376 unsigned PrologStage = 0;
377 unsigned PrevStage = 0;
378 bool InKernel = (LastStageNum == CurStageNum);
379 if (InKernel) {
380 PrologStage = LastStageNum - 1;
381 PrevStage = CurStageNum;
382 } else {
383 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
384 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
385 }
386
388 BBE = BB->getFirstNonPHI();
389 BBI != BBE; ++BBI) {
390 Register Def = BBI->getOperand(0).getReg();
391
392 unsigned InitVal = 0;
393 unsigned LoopVal = 0;
394 getPhiRegs(*BBI, BB, InitVal, LoopVal);
395
396 unsigned PhiOp1 = 0;
397
398
399 unsigned PhiOp2 = LoopVal;
400 if (auto It = VRMap[LastStageNum].find(LoopVal);
401 It != VRMap[LastStageNum].end())
402 PhiOp2 = It->second;
403
404 int StageScheduled = Schedule.getStage(&*BBI);
406 unsigned NumStages = getStagesForReg(Def, CurStageNum);
407 if (NumStages == 0) {
408
409
410 unsigned NewReg = VRMap[PrevStage][LoopVal];
411 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
412 InitVal, NewReg);
413 if (VRMap[CurStageNum].count(LoopVal))
414 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
415 }
416
417
418
419
420 unsigned MaxPhis = PrologStage + 2;
421 if (!InKernel && (int)PrologStage <= LoopValStage)
422 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
423 unsigned NumPhis = std::min(NumStages, MaxPhis);
424
425 unsigned NewReg = 0;
426 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
427
428
429
430
431
432 int StageDiff = 0;
433 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
434 NumPhis == 1)
435 StageDiff = 1;
436
437
438 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
439 StageDiff = StageScheduled - LoopValStage;
440 for (unsigned np = 0; np < NumPhis; ++np) {
441
442
443
444 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
445 PhiOp1 = InitVal;
446
447 else if (PrologStage >= AccessStage + StageDiff + np &&
448 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
449 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
450
451
452 else if (PrologStage >= AccessStage + StageDiff + np) {
453
454
455 PhiOp1 = LoopVal;
457 int Indirects = 1;
458 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
459 int PhiStage = Schedule.getStage(InstOp1);
460 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
462 else
465 int PhiOpStage = Schedule.getStage(InstOp1);
466 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
467 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
468 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
469 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
470 break;
471 }
472 ++Indirects;
473 }
474 } else
475 PhiOp1 = InitVal;
476
477
479 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
481
483 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
484
485
486
487 if (!InKernel) {
488 int StageDiffAdj = 0;
489 if (LoopValStage != -1 && StageScheduled > LoopValStage)
490 StageDiffAdj = StageScheduled - LoopValStage;
491
492
493 if (np == 0 && PrevStage == LastStageNum &&
494 (StageScheduled != 0 || LoopValStage != 0) &&
495 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
496 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
497
498
499 else if (np > 0 && PrevStage == LastStageNum &&
500 VRMap[PrevStage - np + 1].count(Def))
501 PhiOp2 = VRMap[PrevStage - np + 1][Def];
502
503 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
504 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
505 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
506
507
508 else if (VRMap[PrevStage - np].count(Def) &&
509 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
510 (LoopValStage == StageScheduled)))
511 PhiOp2 = VRMap[PrevStage - np][Def];
512 }
513
514
515
516
517
518 if (LoopDefIsPhi) {
519 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
520 int LVNumStages = getStagesForPhi(LoopVal);
521 int StageDiff = (StageScheduled - LoopValStage);
522 LVNumStages -= StageDiff;
523
524 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
525 NewReg = PhiOp2;
526 unsigned ReuseStage = CurStageNum;
527 if (isLoopCarried(*PhiInst))
528 ReuseStage -= LVNumStages;
529
530
531 if (VRMap[ReuseStage - np].count(LoopVal)) {
532 NewReg = VRMap[ReuseStage - np][LoopVal];
533
534 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
535 Def, NewReg);
536
537 VRMap[CurStageNum - np][Def] = NewReg;
538 PhiOp2 = NewReg;
539 if (VRMap[LastStageNum - np - 1].count(LoopVal))
540 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
541
542 if (IsLast && np == NumPhis - 1)
544 continue;
545 }
546 }
547 }
548 if (InKernel && StageDiff > 0 &&
549 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
550 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
551 }
552
555
558 TII->get(TargetOpcode::PHI), NewReg);
561 if (np == 0)
562 InstrMap[NewPhi] = &*BBI;
563
564
565
566
567 unsigned PrevReg = 0;
568 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
569 PrevReg = VRMap[PrevStage - np][LoopVal];
570 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
571 NewReg, PrevReg);
572
573 if (VRMap[CurStageNum - np].count(Def)) {
574 unsigned R = VRMap[CurStageNum - np][Def];
575 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
576 NewReg);
577 }
578
579
580
581
582 if (IsLast && np == NumPhis - 1)
584
585
586 if (InKernel)
587 PhiOp2 = NewReg;
588
589
590 VRMap[CurStageNum - np][Def] = NewReg;
591 }
592
593 while (NumPhis++ < NumStages) {
594 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
595 NewReg, 0);
596 }
597
598
599
600 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
602 }
603}
604
605
606
607
608void ModuloScheduleExpander::generatePhis(
610 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
611 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
612 bool IsLast) {
613
614
615 unsigned PrologStage = 0;
616 unsigned PrevStage = 0;
617 unsigned StageDiff = CurStageNum - LastStageNum;
618 bool InKernel = (StageDiff == 0);
619 if (InKernel) {
620 PrologStage = LastStageNum - 1;
621 PrevStage = CurStageNum;
622 } else {
623 PrologStage = LastStageNum - StageDiff;
624 PrevStage = LastStageNum + StageDiff - 1;
625 }
626
628 BBE = BB->instr_end();
629 BBI != BBE; ++BBI) {
630 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
633 continue;
634
635 int StageScheduled = Schedule.getStage(&*BBI);
636 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
638 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
639
640
641
642 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
644 NumPhis = 1;
645 if (!InKernel && (unsigned)StageScheduled > PrologStage)
646 continue;
647
648 unsigned PhiOp2;
649 if (InKernel) {
650 PhiOp2 = VRMap[PrevStage][Def];
652 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
654 }
655
656
657 if (NumPhis > PrologStage + 1 - StageScheduled)
658 NumPhis = PrologStage + 1 - StageScheduled;
659 for (unsigned np = 0; np < NumPhis; ++np) {
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682 unsigned PhiOp1 = VRMap[PrologStage][Def];
683 if (np <= PrologStage)
684 PhiOp1 = VRMap[PrologStage - np][Def];
685 if (!InKernel) {
686 if (PrevStage == LastStageNum && np == 0)
687 PhiOp2 = VRMap[LastStageNum][Def];
688 else
689 PhiOp2 = VRMapPhi[PrevStage - np][Def];
690 }
691
694
697 TII->get(TargetOpcode::PHI), NewReg);
700 if (np == 0)
701 InstrMap[NewPhi] = &*BBI;
702
703
704
705 if (InKernel) {
706 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
707 NewReg);
708 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
709 NewReg);
710
711 PhiOp2 = NewReg;
712 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
713 } else {
714 VRMapPhi[CurStageNum - np][Def] = NewReg;
715 if (np == NumPhis - 1)
716 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
717 NewReg);
718 }
719 if (IsLast && np == NumPhis - 1)
721 }
722 }
723 }
724}
725
726
727
728
729
730void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
731 MBBVectorTy &EpilogBBs) {
732
733
737 MI != ME;) {
738
739 if (MI->isInlineAsm()) {
740 ++MI;
741 continue;
742 }
743 bool SawStore = false;
744
745
746 if (->isSafeToMove(SawStore) &&
->isPHI()) {
747 ++MI;
748 continue;
749 }
750 bool used = true;
753
756 if (used)
757 break;
758 continue;
759 }
760 unsigned realUses = 0;
762
763
764 if (U.getParent()->getParent() != BB) {
765 realUses++;
766 used = true;
767 break;
768 }
769 }
770 if (realUses > 0)
771 break;
772 used = false;
773 }
774 if (!used) {
776 MI++->eraseFromParent();
777 continue;
778 }
779 ++MI;
780 }
781
782
784 Register reg = MI.getOperand(0).getReg();
787 MI.eraseFromParent();
788 }
789 }
790}
791
792
793
794
795
796
797
798
799
800
801
802void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
803 MBBVectorTy &EpilogBBs) {
805 for (auto &PHI : KernelBB->phis()) {
807
808
812 if (I->isPHI() && I->getParent() == KernelBB) {
813
815 if (!LCDef)
816 continue;
818 if ( || MI->getParent() != KernelBB || MI->isPHI())
819 continue;
820
821
822 unsigned SplitReg = 0;
825 if (BBJ.readsRegister(Def, nullptr)) {
826
827 if (SplitReg == 0) {
829 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
830 TII->get(TargetOpcode::COPY), SplitReg)
832 }
833 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
834 }
835 if (!SplitReg)
836 continue;
837
838 for (auto &Epilog : EpilogBBs)
840 if (I.readsRegister(Def, nullptr))
841 I.substituteRegister(Def, SplitReg, 0, *TRI);
842 break;
843 }
844 }
845 }
846}
847
848
851 if (.isPHI())
852 break;
853 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
854 if (MI.getOperand(i + 1).getMBB() == Incoming) {
855 MI.removeOperand(i + 1);
856 MI.removeOperand(i);
857 break;
858 }
859 }
860}
861
862
863
864
865void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
866 MBBVectorTy &PrologBBs,
868 MBBVectorTy &EpilogBBs,
869 ValueMapTy *VRMap) {
870 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
873
874
875
877 unsigned MaxIter = PrologBBs.size() - 1;
878 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
879
880
883
885 std::optional StaticallyGreater =
887 unsigned numAdded = 0;
888 if (!StaticallyGreater) {
891 } else if (*StaticallyGreater == false) {
893 Prolog->removeSuccessor(LastPro);
897
898 if (LastPro != LastEpi) {
899 LastEpi->clear();
901 }
902 if (LastPro == KernelBB) {
904 NewKernel = nullptr;
905 }
906 LastPro->clear();
908 } else {
911 }
915 E = Prolog->instr_rend();
916 I != E && numAdded > 0; ++I, --numAdded)
917 updateInstruction(&*I, false, j, 0, VRMap);
918 }
919
920 if (NewKernel) {
921 LoopInfo->setPreheader(PrologBBs[MaxIter]);
922 LoopInfo->adjustTripCount(-(MaxIter + 1));
923 }
924}
925
926
927
928bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
932 bool OffsetIsScalable;
934 return false;
935
936
937 if (OffsetIsScalable)
938 return false;
939
940 if (!BaseOp->isReg())
941 return false;
942
944
946
948 if (BaseDef && BaseDef->isPHI()) {
950 BaseDef = MRI.getVRegDef(BaseReg);
951 }
952 if (!BaseDef)
953 return false;
954
955 int D = 0;
957 return false;
958
959 Delta = D;
960 return true;
961}
962
963
964
965
966void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
968 unsigned Num) {
969 if (Num == 0)
970 return;
971
972
974 return;
977
978 if (MMO->isVolatile() || MMO->isAtomic() ||
979 (MMO->isInvariant() && MMO->isDereferenceable()) ||
980 (!MMO->getValue())) {
982 continue;
983 }
984 unsigned Delta;
985 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
986 int64_t AdjOffset = Delta * Num;
989 } else {
992 }
993 }
995}
996
997
998
1000 unsigned CurStageNum,
1001 unsigned InstStageNum) {
1003 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1004 return NewMI;
1005}
1006
1007
1008
1009
1010MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1011 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1013 auto It = InstrChanges.find(OldMI);
1014 if (It != InstrChanges.end()) {
1015 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1016 unsigned BasePos, OffsetPos;
1018 return nullptr;
1020 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1021 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1022 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1024 }
1025 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1026 return NewMI;
1027}
1028
1029
1030
1031void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1032 bool LastDef,
1033 unsigned CurStageNum,
1034 unsigned InstrStageNum,
1035 ValueMapTy *VRMap) {
1038 continue;
1040 if (MO.isDef()) {
1041
1043 Register NewReg = MRI.createVirtualRegister(RC);
1045 VRMap[CurStageNum][reg] = NewReg;
1046 if (LastDef)
1048 } else if (MO.isUse()) {
1050
1051 int DefStageNum = Schedule.getStage(Def);
1052 unsigned StageNum = CurStageNum;
1053 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1054
1055 unsigned StageDiff = (InstrStageNum - DefStageNum);
1056
1057 StageNum -= StageDiff;
1058 }
1059 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())
1060 MO.setReg(It->second);
1061 }
1062 }
1063}
1064
1065
1066
1067
1068MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1071 while (Def->isPHI()) {
1072 if (!Visited.insert(Def).second)
1073 break;
1074 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1075 if (Def->getOperand(i + 1).getMBB() == BB) {
1076 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1077 break;
1078 }
1079 }
1080 return Def;
1081}
1082
1083
1084unsigned ModuloScheduleExpander::getPrevMapVal(
1085 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1087 unsigned PrevVal = 0;
1088 if (StageNum > PhiStage) {
1090 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1091
1092 PrevVal = VRMap[StageNum - 1][LoopVal];
1093 else if (VRMap[StageNum].count(LoopVal))
1094
1095
1096 PrevVal = VRMap[StageNum][LoopVal];
1097 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1098
1099 PrevVal = LoopVal;
1100 else if (StageNum == PhiStage + 1)
1101
1103 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1104
1105 PrevVal =
1106 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1107 LoopStage, VRMap, BB);
1108 }
1109 return PrevVal;
1110}
1111
1112
1113
1114
1115
1116void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1117 unsigned StageNum,
1118 ValueMapTy *VRMap,
1119 InstrMapTy &InstrMap) {
1120 for (auto &PHI : BB->phis()) {
1121 unsigned InitVal = 0;
1122 unsigned LoopVal = 0;
1124 Register PhiDef = PHI.getOperand(0).getReg();
1125
1126 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1127 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1128 unsigned NumPhis = getStagesForPhi(PhiDef);
1129 if (NumPhis > StageNum)
1130 NumPhis = StageNum;
1131 for (unsigned np = 0; np <= NumPhis; ++np) {
1132 unsigned NewVal =
1133 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1134 if (!NewVal)
1135 NewVal = InitVal;
1136 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1137 NewVal);
1138 }
1139 }
1140}
1141
1142
1143
1144
1145void ModuloScheduleExpander::rewriteScheduledInstr(
1146 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1147 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1148 unsigned PrevReg) {
1150 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1151
1152
1157 continue;
1160 continue;
1162 continue;
1163 }
1165 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1167 int StageSched = Schedule.getStage(OrigMI);
1168 int CycleSched = Schedule.getCycle(OrigMI);
1169 unsigned ReplaceReg = 0;
1170
1171 if (StagePhi == StageSched && Phi->isPHI()) {
1172 int CyclePhi = Schedule.getCycle(Phi);
1173 if (PrevReg && InProlog)
1174 ReplaceReg = PrevReg;
1175 else if (PrevReg && !isLoopCarried(*Phi) &&
1176 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1177 ReplaceReg = PrevReg;
1178 else
1179 ReplaceReg = NewReg;
1180 }
1181
1182
1183 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1184 ReplaceReg = NewReg;
1185 if (StagePhi > StageSched && Phi->isPHI())
1186 ReplaceReg = NewReg;
1187 if (!InProlog && ->isPHI() && StagePhi < StageSched)
1188 ReplaceReg = NewReg;
1189 if (ReplaceReg) {
1191 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1192 if (NRC)
1193 UseOp.setReg(ReplaceReg);
1194 else {
1195 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1197 SplitReg)
1198 .addReg(ReplaceReg);
1199 UseOp.setReg(SplitReg);
1200 }
1201 }
1202 }
1203}
1204
1205bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1206 if (.isPHI())
1207 return false;
1208 int DefCycle = Schedule.getCycle(&Phi);
1209 int DefStage = Schedule.getStage(&Phi);
1210
1211 unsigned InitVal = 0;
1212 unsigned LoopVal = 0;
1213 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1215 if ( || Use->isPHI())
1216 return true;
1219 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1220}
1221
1222
1223
1224
1225
1226
1227
1228
1229namespace {
1230
1231
1233 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1234 bool Changed = true;
1235 while (Changed) {
1236 Changed = false;
1239 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1240 if (LIS)
1242 MI.eraseFromParent();
1243 Changed = true;
1244 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1246 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1247 MRI.getRegClass(MI.getOperand(0).getReg()));
1248 assert(ConstrainRegClass &&
1249 "Expected a valid constrained register class!");
1250 (void)ConstrainRegClass;
1251 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1252 MI.getOperand(1).getReg());
1253 if (LIS)
1255 MI.eraseFromParent();
1256 Changed = true;
1257 }
1258 }
1259 }
1260}
1261
1262
1263
1264class KernelRewriter {
1271
1272
1274
1275
1277
1279
1280
1281
1283
1284
1285
1288
1290
1291public:
1294 void rewrite();
1295};
1296}
1297
1300 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1301 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1302 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1304 if (PreheaderBB == BB)
1305 PreheaderBB = *std::next(BB->pred_begin());
1306}
1307
1308void KernelRewriter::rewrite() {
1309
1310
1311
1312
1316 if (MI->isPHI())
1317 continue;
1318 if (MI->getParent())
1319 MI->removeFromParent();
1321 if (!FirstMI)
1322 FirstMI = MI;
1323 }
1324 assert(FirstMI && "Failed to find first MI in schedule");
1325
1326
1327
1329 if (LIS)
1331 (I++)->eraseFromParent();
1332 }
1333
1334
1336 if (MI.isPHI() || MI.isTerminator())
1337 continue;
1340 continue;
1343 }
1344 }
1345 EliminateDeadPhis(BB, MRI, LIS);
1346
1347
1348
1349
1350
1351 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1352 if (MI->isPHI()) {
1353 Register R = MI->getOperand(0).getReg();
1355 continue;
1356 }
1357
1360 if (MI.getParent() != BB) {
1362 break;
1363 }
1364 }
1365 }
1366 }
1367}
1368
1371 if (!Producer)
1372 return Reg;
1373
1374 int ConsumerStage = S.getStage(&MI);
1376
1377
1378 if (Producer->getParent() != BB)
1379
1380 return Reg;
1381 int ProducerStage = S.getStage(Producer);
1382 assert(ConsumerStage != -1 &&
1383 "In-loop consumer should always be scheduled!");
1384 assert(ConsumerStage >= ProducerStage);
1385 unsigned StageDiff = ConsumerStage - ProducerStage;
1386
1387 for (unsigned I = 0; I < StageDiff; ++I)
1388 Reg = phi(Reg);
1389 return Reg;
1390 }
1391
1392
1393
1396 auto LoopProducer = Producer;
1397 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1400 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1401 assert(LoopProducer);
1402 }
1403 int LoopProducerStage = S.getStage(LoopProducer);
1404
1405 std::optional IllegalPhiDefault;
1406
1407 if (LoopProducerStage == -1) {
1408
1409 } else if (LoopProducerStage > ConsumerStage) {
1410
1411
1412
1413
1414#ifndef NDEBUG
1415 int LoopProducerCycle = S.getCycle(LoopProducer);
1416 int ConsumerCycle = S.getCycle(&MI);
1417#endif
1418 assert(LoopProducerCycle <= ConsumerCycle);
1419 assert(LoopProducerStage == ConsumerStage + 1);
1420
1421
1422
1423
1424
1425
1426 IllegalPhiDefault = Defaults.front();
1428 } else {
1429 assert(ConsumerStage >= LoopProducerStage);
1430 int StageDiff = ConsumerStage - LoopProducerStage;
1431 if (StageDiff > 0) {
1432 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1433 << " to " << (Defaults.size() + StageDiff) << "\n");
1434
1435
1436
1437 Defaults.resize(Defaults.size() + StageDiff,
1438 Defaults.empty() ? std::optional()
1439 : Defaults.back());
1440 }
1441 }
1442
1443
1444 auto DefaultI = Defaults.rbegin();
1445 while (DefaultI != Defaults.rend())
1446 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1447
1448 if (IllegalPhiDefault) {
1449
1450
1451
1452
1453
1454 auto RC = MRI.getRegClass(Reg);
1458 .addReg(*IllegalPhiDefault)
1459 .addMBB(PreheaderBB)
1461 .addMBB(BB);
1462
1463
1464 S.setStage(IllegalPhi, LoopProducerStage);
1465 return R;
1466 }
1467
1468 return LoopReg;
1469}
1470
1471Register KernelRewriter::phi(Register LoopReg, std::optional InitReg,
1473
1474 if (InitReg) {
1475 auto I = Phis.find({LoopReg, *InitReg});
1476 if (I != Phis.end())
1477 return I->second;
1478 } else {
1479 for (auto &KV : Phis) {
1480 if (KV.first.first == LoopReg)
1481 return KV.second;
1482 }
1483 }
1484
1485
1486
1487 auto I = UndefPhis.find(LoopReg);
1488 if (I != UndefPhis.end()) {
1490 if (!InitReg)
1491
1492
1493 return R;
1494
1496 MI->getOperand(1).setReg(*InitReg);
1497 Phis.insert({{LoopReg, *InitReg}, R});
1499 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1500 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1501 (void)ConstrainRegClass;
1502 UndefPhis.erase(I);
1503 return R;
1504 }
1505
1506
1507 if (!RC)
1508 RC = MRI.getRegClass(LoopReg);
1510 if (InitReg) {
1512 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1513 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1514 (void)ConstrainRegClass;
1515 }
1516 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1517 .addReg(InitReg ? *InitReg : undef(RC))
1518 .addMBB(PreheaderBB)
1521 if (!InitReg)
1522 UndefPhis[LoopReg] = R;
1523 else
1524 Phis[{LoopReg, *InitReg}] = R;
1525 return R;
1526}
1527
1530 if (R == 0) {
1531
1532
1533
1534 R = MRI.createVirtualRegister(RC);
1535 auto *InsertBB = &PreheaderBB->getParent()->front();
1536 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1537 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1538 }
1539 return R;
1540}
1541
1542namespace {
1543
1544
1545
1546class KernelOperandInfo {
1552
1553public:
1559 while (isRegInLoop(MO)) {
1561 if (MI->isFullCopy()) {
1562 MO = &MI->getOperand(1);
1563 continue;
1564 }
1565 if (->isPHI())
1566 break;
1567
1568 if (IllegalPhis.count(MI)) {
1569 MO = &MI->getOperand(3);
1570 continue;
1571 }
1572
1574 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1575 : &MI->getOperand(3);
1577 }
1579 }
1580
1582 return PhiDefaults.size() == Other.PhiDefaults.size();
1583 }
1584
1586 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1587 << *Source->getParent();
1588 }
1589
1590private:
1593 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1594 }
1595};
1596}
1597
1603 else
1605 for (auto I = BB->begin(), NI = NewBB->begin(); ->isTerminator();
1606 ++I, ++NI) {
1611 }
1612 return NewBB;
1613}
1614
1616 int MinStage) {
1618 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1621 if (Stage == -1 || Stage >= MinStage)
1622 continue;
1623
1627
1628
1631 MI->getParent());
1633 }
1634 for (auto &Sub : Subs)
1635 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1637 }
1638 if (LIS)
1640 MI->eraseFromParent();
1641 }
1642}
1643
1650 if (MI.isPHI()) {
1651
1652
1654
1655
1656 Register PhiR = MI.getOperand(0).getReg();
1665 Remaps[PhiR] = NR;
1666 }
1667 }
1669 continue;
1670 MI.removeFromParent();
1671 DestBB->insert(InsertPt, &MI);
1674 BlockMIs.erase({SourceBB, KernelMI});
1675 }
1678 assert(MI.getNumOperands() == 3);
1680
1681
1682 if (getStage(Def) == Stage) {
1683 Register PhiReg = MI.getOperand(0).getReg();
1684 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1685 nullptr) != -1);
1687 MI.getOperand(0).setReg(PhiReg);
1689 }
1690 }
1691 for (auto *P : PhiToDelete)
1692 P->eraseFromParent();
1694
1695
1698 DestBB->insert(InsertPt, NewMI);
1699 Register OrigR = Phi->getOperand(0).getReg();
1704 Remaps[OrigR] = R;
1708 return R;
1709 };
1712 if (!MO.isReg())
1713 continue;
1714 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())
1715 MO.setReg(It->second);
1716 else {
1717
1718
1720 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1723 }
1724 }
1725 }
1726 }
1727}
1728
1735 for (unsigned I = 0; I < distance; ++I) {
1738 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1740 std::swap(LoopRegIdx, InitRegIdx);
1741 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1743 }
1744 return CanonicalUseReg;
1745}
1746
1752
1753
1754 LS.reset();
1756 LS[I] = true;
1760 }
1761
1762
1763
1764
1765
1766
1767
1769 EliminateDeadPhis(ExitingBB, MRI, LIS, true);
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1790
1791
1792 EliminateDeadPhis(B, MRI, LIS, true);
1795 }
1796 for (size_t I = 0; I < Epilogs.size(); I++) {
1797 LS.reset();
1798 for (size_t J = I; J < Epilogs.size(); J++) {
1799 int Iteration = J;
1801
1802 for (size_t K = Iteration; K > I; K--)
1804 LS[Stage] = true;
1805 }
1808 }
1809
1810
1811
1812
1813 auto PI = Prologs.begin();
1814 auto EI = Epilogs.begin();
1816 for (; PI != Prologs.end(); ++PI, ++EI) {
1818 (*PI)->addSuccessor(*EI);
1820 Register Reg = MI.getOperand(1).getReg();
1822 if (Use && Use->getParent() == Pred) {
1824 if (CanonicalUse->isPHI()) {
1825
1826
1827
1829 }
1831 }
1834 }
1835 }
1836
1837
1842
1843
1845 for (auto I = B->instr_rbegin();
1846 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1849 }
1850 }
1852 if (LIS)
1854 MI->eraseFromParent();
1855 }
1857
1858
1860 EliminateDeadPhis(B, MRI, LIS);
1861 EliminateDeadPhis(ExitingBB, MRI, LIS);
1862}
1863
1867 if (Exit == BB)
1869
1872
1873
1876 Register OldR = MI.getOperand(3).getReg();
1880 if (Use.getParent() != BB)
1883 Use->substituteRegister(OldR, R, 0,
1890 }
1892 Exit->replacePhiUsesWith(BB, NewBB);
1894
1898 (void)CanAnalyzeBr;
1899 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1904 return NewBB;
1905}
1906
1911 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, nullptr);
1913}
1914
1916 if (MI->isPHI()) {
1917
1918
1919 Register PhiR = MI->getOperand(0).getReg();
1920 Register R = MI->getOperand(3).getReg();
1922 if (RMIStage != -1 && [MI->getParent()].test(RMIStage))
1923 R = MI->getOperand(1).getReg();
1926
1927
1928 MI->getOperand(0).setReg(PhiR);
1930 return;
1931 }
1932
1934 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1936
1937 return;
1938
1942
1943
1946 MI->getParent());
1948 }
1949 for (auto &Sub : Subs)
1950 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1952 }
1953 if (LIS)
1955 MI->eraseFromParent();
1956}
1957
1959
1960 bool KernelDisposed = false;
1963 ++PI, ++EI, --TC) {
1969 std::optional StaticallyGreater =
1971 if (!StaticallyGreater) {
1972 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1973
1975 } else if (*StaticallyGreater == false) {
1976 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1977
1978
1979 Prolog->removeSuccessor(Fallthrough);
1981 P.removeOperand(2);
1982 P.removeOperand(1);
1983 }
1985 KernelDisposed = true;
1986 } else {
1987 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1988
1991 P.removeOperand(4);
1992 P.removeOperand(3);
1993 }
1994 }
1995 }
1996
1997 if (!KernelDisposed) {
2000 } else {
2002 }
2003}
2004
2007 KR.rewrite();
2008}
2009
2016
2020}
2021
2025
2026
2027
2028 std::string ScheduleDump;
2032
2033
2034
2035 assert(LIS && "Requires LiveIntervals!");
2040 if (!ExpandedKernel) {
2041
2043 return;
2044 }
2045
2047
2048
2050 KR.rewrite();
2052
2053
2054
2057 if (NI->isPHI())
2058 IllegalPhis.insert(&*NI);
2059 }
2060
2061
2062
2064 auto OI = ExpandedKernel->begin();
2066 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2067 while (OI->isPHI() || OI->isFullCopy())
2068 ++OI;
2069 while (NI->isPHI() || NI->isFullCopy())
2070 ++NI;
2071 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2072
2073 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2074 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2075 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2076 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2077 }
2078
2079 bool Failed = false;
2080 for (auto &OldAndNew : KOIs) {
2081 if (OldAndNew.first == OldAndNew.second)
2082 continue;
2084 errs() << "Modulo kernel validation error: [\n";
2085 errs() << " [golden] ";
2086 OldAndNew.first.print(errs());
2087 errs() << " ";
2088 OldAndNew.second.print(errs());
2089 errs() << "]\n";
2090 }
2091
2093 errs() << "Golden reference kernel:\n";
2095 errs() << "New kernel:\n";
2097 errs() << ScheduleDump;
2099 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2100 }
2101
2102
2103
2106}
2107
2110
2111
2113
2114 return NewMI;
2115}
2116
2117
2118
2119
2122 if (Exit->pred_size() == 1)
2123 return Exit;
2124
2127
2130 MF->insert(Loop->getIterator(), NewExit);
2131
2136 FBB = NewExit;
2137 else if (FBB == Loop)
2138 TBB = NewExit;
2139 else
2143 Loop->replaceSuccessor(Exit, NewExit);
2144 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2146
2147 Exit->replacePhiUsesWith(Loop, NewExit);
2148
2149 return NewExit;
2150}
2151
2152
2153
2154
2156 int RequiredTC,
2157 InstrMapTy &LastStage0Insts,
2161 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2162 LastStage0Insts);
2163
2165
2166
2170 } else {
2172 }
2173}
2174
2175
2176
2177
2178
2179
2180void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2255 assert(LoopInfo && "Must be able to analyze loop!");
2256
2257 calcNumUnroll();
2258
2264
2270
2272
2275
2279
2282
2283 Prolog->addSuccessor(NewKernel);
2284
2287
2288 Epilog->addSuccessor(NewPreheader);
2289 Epilog->addSuccessor(NewExit);
2290
2291 InstrMapTy LastStage0Insts;
2292 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2293 LastStage0Insts, *Prolog, *NewPreheader);
2294
2295
2296
2298 generateProlog(PrologVRMap);
2299 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2300 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2301}
2302
2303
2304void ModuloScheduleExpanderMVE::updateInstrUse(
2308
2309
2310
2311
2312
2313
2315 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2316 continue;
2317 int DiffStage = 0;
2318 Register OrigReg = UseMO.getReg();
2320 if (!DefInst || DefInst->getParent() != OrigKernel)
2321 continue;
2322 unsigned InitReg = 0;
2323 unsigned DefReg = OrigReg;
2324 if (DefInst->isPHI()) {
2325 ++DiffStage;
2326 unsigned LoopReg;
2327 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2328
2329 DefReg = LoopReg;
2330 DefInst = MRI.getVRegDef(LoopReg);
2331 }
2332 unsigned DefStageNum = Schedule.getStage(DefInst);
2333 DiffStage += StageNum - DefStageNum;
2335 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2336
2337 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2338 else if (!PrevVRMap)
2339
2340
2341 NewReg = InitReg;
2342 else
2343
2344
2345
2346
2347 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2348
2350 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2351 if (NRC)
2352 UseMO.setReg(NewReg);
2353 else {
2354 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2355 BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
2356 SplitReg)
2358 UseMO.setReg(SplitReg);
2359 }
2360 }
2361}
2362
2363
2364
2367 unsigned InitVal, LoopVal;
2369 if (LoopVal == Reg)
2370 return Φ
2371 }
2372 return nullptr;
2373}
2374
2375
2376void ModuloScheduleExpanderMVE::generatePhi(
2381 int StageNum = Schedule.getStage(OrigMI);
2382 bool UsePrologReg;
2383 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2384 UsePrologReg = true;
2385 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2386 UsePrologReg = false;
2387 else
2388 return;
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2422 if (!DefMO.isReg() || DefMO.isDead())
2423 continue;
2424 Register OrigReg = DefMO.getReg();
2425 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2426 if (NewReg == KernelVRMap[UnrollNum].end())
2427 continue;
2429 if (UsePrologReg) {
2430 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2431 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2432 } else {
2434 if (!Phi)
2435 continue;
2436 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2437 }
2438
2440 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2442 TII->get(TargetOpcode::PHI), PhiReg)
2443 .addReg(NewReg->second)
2445 .addReg(CorrespondReg)
2447 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2448 }
2449}
2450
2453 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2454 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2455 Phi.getOperand(Idx).setReg(NewReg);
2456 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2457 return;
2458 }
2459 }
2460}
2461
2462
2463void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2468 E = MRI.use_end();
2471 if (O.getParent()->getParent() != OrigKernel &&
2472 O.getParent()->getParent() != Prolog &&
2473 O.getParent()->getParent() != NewKernel &&
2474 O.getParent()->getParent() != Epilog)
2476 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2478 }
2479
2480
2481
2482 if (!UsesAfterLoop.empty()) {
2483 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2485 TII->get(TargetOpcode::PHI), PhiReg)
2490
2493
2496 }
2497
2498
2499
2500 if (!LoopPhis.empty()) {
2502 unsigned InitReg, LoopReg;
2503 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2504 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2506 TII->get(TargetOpcode::PHI), NewInit)
2511 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2512 }
2513 }
2514}
2515
2516void ModuloScheduleExpanderMVE::generateProlog(
2518 PrologVRMap.clear();
2521 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2522 ++PrologNum) {
2524 if (MI->isPHI())
2525 continue;
2526 int StageNum = Schedule.getStage(MI);
2527 if (StageNum > PrologNum)
2528 continue;
2530 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2531 NewMIMap[NewMI] = {PrologNum, StageNum};
2532 Prolog->push_back(NewMI);
2533 }
2534 }
2535
2536 for (auto I : NewMIMap) {
2538 int PrologNum = I.second.first;
2539 int StageNum = I.second.second;
2540 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2541 }
2542
2544 dbgs() << "prolog:\n";
2546 });
2547}
2548
2549void ModuloScheduleExpanderMVE::generateKernel(
2552 KernelVRMap.clear();
2553 KernelVRMap.resize(NumUnroll);
2555 PhiVRMap.resize(NumUnroll);
2558 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2560 if (MI->isPHI())
2561 continue;
2562 int StageNum = Schedule.getStage(MI);
2564 if (UnrollNum == NumUnroll - 1)
2565 LastStage0Insts[MI] = NewMI;
2566 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2567 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2568 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2569 NewMIMap[NewMI] = {UnrollNum, StageNum};
2571 }
2572 }
2573
2574 for (auto I : NewMIMap) {
2576 int UnrollNum = I.second.first;
2577 int StageNum = I.second.second;
2578 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2579 }
2580
2581
2582 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2583 *Epilog);
2584
2586 dbgs() << "kernel:\n";
2587 NewKernel->dump();
2588 });
2589}
2590
2591void ModuloScheduleExpanderMVE::generateEpilog(
2594 EpilogVRMap.clear();
2597 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2598 ++EpilogNum) {
2600 if (MI->isPHI())
2601 continue;
2602 int StageNum = Schedule.getStage(MI);
2603 if (StageNum <= EpilogNum)
2604 continue;
2606 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2607 NewMIMap[NewMI] = {EpilogNum, StageNum};
2608 Epilog->push_back(NewMI);
2609 }
2610 }
2611
2612 for (auto I : NewMIMap) {
2614 int EpilogNum = I.second.first;
2615 int StageNum = I.second.second;
2616 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2617 }
2618
2619
2620
2621
2622
2623 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2624
2626 dbgs() << "epilog:\n";
2628 });
2629}
2630
2631
2632void ModuloScheduleExpanderMVE::calcNumUnroll() {
2634 NumUnroll = 1;
2637
2639 if (MI->isPHI())
2640 continue;
2641 int StageNum = Schedule.getStage(MI);
2644 continue;
2647 continue;
2648
2649 int NumUnrollLocal = 1;
2651 ++NumUnrollLocal;
2652
2653
2655 }
2656 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2657 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2658 --NumUnrollLocal;
2659 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2660 }
2661 }
2662 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2663}
2664
2665
2666
2667
2668void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2669 ValueMapTy &VRMap,
2670 bool LastDef) {
2673 continue;
2676 Register NewReg = MRI.createVirtualRegister(RC);
2678 VRMap[Reg] = NewReg;
2679 if (LastDef)
2680 mergeRegUsesAfterPipeline(Reg, NewReg);
2681 }
2682}
2683
2688
2690
2691 generatePipelinedLoop();
2692}
2693
2694
2696 if (!L.getExitBlock()) {
2697 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2698 return false;
2699 }
2700
2703
2704
2705
2708
2709
2713 if (Ref.getParent() != BB || Ref.isPHI()) {
2714 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2715 "referenced outside of the loop or by phi.\n");
2716 return false;
2717 }
2718
2719
2720
2721
2722 unsigned InitVal, LoopVal;
2724 if ((LoopVal).isVirtual() ||
2725 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2727 dbgs() << "Can not apply MVE expander: A phi source value coming "
2728 "from the loop is not defined in the loop.\n");
2729 return false;
2730 }
2731 if (UsedByPhi.count(LoopVal)) {
2732 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2733 "loop is referenced by two or more phis.\n");
2734 return false;
2735 }
2736 UsedByPhi.insert(LoopVal);
2737 }
2738
2739 return true;
2740}
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755namespace {
2757public:
2758 static char ID;
2759
2762 }
2763
2764 bool runOnMachineFunction(MachineFunction &MF) override;
2766
2767 void getAnalysisUsage(AnalysisUsage &AU) const override {
2771 }
2772};
2773}
2774
2775char ModuloScheduleTest::ID = 0;
2776
2778 "Modulo Schedule test pass", false, false)
2783
2784bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2785 MachineLoopInfo &MLI = getAnalysis().getLI();
2786 for (auto *L : MLI) {
2787 if (L->getTopBlock() != L->getBottomBlock())
2788 continue;
2789 runOnLoop(MF, *L);
2790 return false;
2791 }
2792 return false;
2793}
2794
2796 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2797 std::pair<StringRef, StringRef> StageTokenAndValue =
2798 getToken(StageAndCycle.first, "-");
2799 std::pair<StringRef, StringRef> CycleTokenAndValue =
2800 getToken(StageAndCycle.second, "-");
2801 if (StageTokenAndValue.first != "Stage" ||
2802 CycleTokenAndValue.first != "_Cycle") {
2804 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2805 return;
2806 }
2807
2808 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2809 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2810
2811 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2812}
2813
2815 LiveIntervals &LIS = getAnalysis().getLIS();
2817 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2818
2820 std::vector<MachineInstr *> Instrs;
2822 if (MI.isTerminator())
2823 continue;
2824 Instrs.push_back(&MI);
2826 dbgs() << "Parsing post-instr symbol for " << MI;
2828 }
2829 }
2830
2832 std::move(Stage));
2835 MSE.expand();
2836 MSE.cleanup();
2837}
2838
2839
2840
2841
2842
2849 MI->setPostInstrSymbol(MF, Sym);
2850 }
2851}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
DenseMap< Block *, BlockRelaxAux > Blocks
global merge Global merge function pass
const HexagonInstrInfo * TII
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)
Create a dedicated exit for Loop.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Implements a dense probed hash-table based set.
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
succ_iterator succ_begin()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
pred_iterator pred_begin()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
ModuloSchedule & Schedule
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
const TargetInstrInfo * TII
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
MachineRegisterInfo & MRI
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
reverse_iterator rbegin()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt copy(R &&Range, OutputIt Out)
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...