LLVM: lib/CodeGen/ModuloSchedule.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
22
23#define DEBUG_TYPE "pipeliner"
24using namespace llvm;
25
28 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
29
34
35
36
37
38
39
40
43 assert(Phi.isPHI() && "Expecting a Phi.");
44
47 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
48 if (Phi.getOperand(i + 1).getMBB() != Loop)
49 InitVal = Phi.getOperand(i).getReg();
50 else
51 LoopVal = Phi.getOperand(i).getReg();
52
53 assert(InitVal && LoopVal && "Unexpected Phi structure.");
54}
55
56
58 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
59 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
60 return Phi.getOperand(i).getReg();
62}
63
64
66 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
67 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
68 return Phi.getOperand(i).getReg();
70}
71
73 BB = Schedule.getLoop()->getTopBlock();
74 Preheader = *BB->pred_begin();
75 if (Preheader == BB)
76 Preheader = *std::next(BB->pred_begin());
77
78
79
81 int DefStage = Schedule.getStage(MI);
84 unsigned MaxDiff = 0;
85 bool PhiIsSwapped = false;
88 int UseStage = Schedule.getStage(UseMI);
89 unsigned Diff = 0;
90 if (UseStage != -1 && UseStage >= DefStage)
91 Diff = UseStage - DefStage;
92 if (MI->isPHI()) {
93 if (isLoopCarried(*MI))
94 ++Diff;
95 else
96 PhiIsSwapped = true;
97 }
98 MaxDiff = std::max(Diff, MaxDiff);
99 }
100 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
101 }
102 }
103
104 generatePipelinedLoop();
105}
106
107void ModuloScheduleExpander::generatePipelinedLoop() {
108 LoopInfo = TII->analyzeLoopForPipelining(BB);
110
111
113
114 unsigned MaxStageCount = Schedule.getNumStages() - 1;
115
116
117
118
119
120 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
121
122
123
124
125 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
126
127 InstrMapTy InstrMap;
128
130
131
132 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
135
136
137
139 if (CI->isPHI())
140 continue;
141 unsigned StageNum = Schedule.getStage(CI);
143 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
146 InstrMap[NewMI] = CI;
147 }
148
149
150
153 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
156 InstrMap[NewMI] = &MI;
157 }
158
159 NewKernel = KernelBB;
162
163 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
164 InstrMap, MaxStageCount, MaxStageCount, false);
165 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
166 InstrMap, MaxStageCount, MaxStageCount, false);
167
169
170 SmallVector<MachineBasicBlock *, 4> EpilogBBs;
171
172 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
173 PrologBBs);
174
175
176
177 splitLifetimes(KernelBB, EpilogBBs);
178
179
180 removeDeadInstructions(KernelBB, EpilogBBs);
181
182
183 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
184
185 delete[] VRMap;
186 delete[] VRMapPhi;
187}
188
190
191 for (auto &I : *BB)
192 LIS.RemoveMachineInstrFromMaps(I);
193 BB->clear();
194 BB->eraseFromParent();
195}
196
197
198void ModuloScheduleExpander::generateProlog(unsigned LastStage,
200 ValueMapTy *VRMap,
201 MBBVectorTy &PrologBBs) {
203 InstrMapTy InstrMap;
204
205
206
207
208 for (unsigned i = 0; i < LastStage; ++i) {
209
210
216 PredBB = NewBB;
218
219
220
221 for (int StageNum = i; StageNum >= 0; --StageNum) {
224 BBI != BBE; ++BBI) {
225 if (Schedule.getStage(&*BBI) == StageNum) {
226 if (BBI->isPHI())
227 continue;
229 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
230 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
233 InstrMap[NewMI] = &*BBI;
234 }
235 }
236 }
237 rewritePhiValues(NewBB, i, VRMap, InstrMap);
239 dbgs() << "prolog:\n";
240 NewBB->dump();
241 });
242 }
243
245
246
247
248 unsigned numBranches = TII->removeBranch(*Preheader);
249 if (numBranches) {
251 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
252 }
253}
254
255
256
257
258void ModuloScheduleExpander::generateEpilog(
260 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
261 MBBVectorTy &PrologBBs) {
262
263
264 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
266 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
267 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
268 if (checkBranch)
269 return;
270
272 if (*LoopExitI == KernelBB)
273 ++LoopExitI;
274 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
275 MachineBasicBlock *LoopExitBB = *LoopExitI;
276
277 MachineBasicBlock *PredBB = KernelBB;
278 MachineBasicBlock *EpilogStart = LoopExitBB;
279 InstrMapTy InstrMap;
280
281
282
283
284 int EpilogStage = LastStage + 1;
285 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
286 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
288 MF.insert(BB->getIterator(), NewBB);
289
292 LIS.insertMBBInMaps(NewBB);
293
294 if (EpilogStart == LoopExitBB)
295 EpilogStart = NewBB;
296
297
298
299 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
300 for (auto &BBI : *BB) {
301 if (BBI.isPHI())
302 continue;
303 MachineInstr *In = &BBI;
304 if ((unsigned)Schedule.getStage(In) == StageNum) {
305
306
307 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
308 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
310 LIS.InsertMachineInstrInMaps(*NewMI);
311 InstrMap[NewMI] = In;
312 }
313 }
314 }
315 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
316 InstrMap, LastStage, EpilogStage, i == 1);
317 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
318 InstrMap, LastStage, EpilogStage, i == 1);
319 PredBB = NewBB;
320
322 dbgs() << "epilog:\n";
323 NewBB->dump();
324 });
325 }
326
327
329
330
331
332 TII->removeBranch(*KernelBB);
333 assert((OrigBB == TBB || OrigBB == FBB) &&
334 "Unable to determine looping branch direction");
335 if (OrigBB != TBB)
336 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
337 else
338 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
339
340 if (EpilogBBs.size() > 0) {
341 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
343 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
344 }
345}
346
347
348
354 if (O.getParent()->getParent() != MBB)
355 O.setReg(ToReg);
356}
357
358
359
363 if (MO.getParent()->getParent() != BB)
364 return true;
365 return false;
366}
367
368
369
370
371void ModuloScheduleExpander::generateExistingPhis(
373 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
374 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
375
376
377
378 unsigned PrologStage = 0;
379 unsigned PrevStage = 0;
380 bool InKernel = (LastStageNum == CurStageNum);
381 if (InKernel) {
382 PrologStage = LastStageNum - 1;
383 PrevStage = CurStageNum;
384 } else {
385 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
386 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
387 }
388
390 BBE = BB->getFirstNonPHI();
391 BBI != BBE; ++BBI) {
392 Register Def = BBI->getOperand(0).getReg();
393
396 getPhiRegs(*BBI, BB, InitVal, LoopVal);
397
399
400
402 if (auto It = VRMap[LastStageNum].find(LoopVal);
403 It != VRMap[LastStageNum].end())
404 PhiOp2 = It->second;
405
406 int StageScheduled = Schedule.getStage(&*BBI);
407 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
408 unsigned NumStages = getStagesForReg(Def, CurStageNum);
409 if (NumStages == 0) {
410
411
412 Register NewReg = VRMap[PrevStage][LoopVal];
413 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
414 InitVal, NewReg);
415 auto It = VRMap[CurStageNum].find(LoopVal);
416 if (It != VRMap[CurStageNum].end()) {
418 VRMap[CurStageNum][Def] = Reg;
419 }
420 }
421
422
423
424
425 unsigned MaxPhis = PrologStage + 2;
426 if (!InKernel && (int)PrologStage <= LoopValStage)
427 MaxPhis = std::max((int)MaxPhis - LoopValStage, 1);
428 unsigned NumPhis = std::min(NumStages, MaxPhis);
429
431 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
432
433
434
435
436
437 int StageDiff = 0;
438 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
439 NumPhis == 1)
440 StageDiff = 1;
441
442
443 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
444 StageDiff = StageScheduled - LoopValStage;
445 for (unsigned np = 0; np < NumPhis; ++np) {
446
447
448
449 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
450 PhiOp1 = InitVal;
451
452 else if (PrologStage >= AccessStage + StageDiff + np &&
453 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
454 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
455
456
457 else if (PrologStage >= AccessStage + StageDiff + np) {
458
459
460 PhiOp1 = LoopVal;
461 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
462 int Indirects = 1;
463 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
464 int PhiStage = Schedule.getStage(InstOp1);
465 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
467 else
469 InstOp1 = MRI.getVRegDef(PhiOp1);
470 int PhiOpStage = Schedule.getStage(InstOp1);
471 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
472 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np) {
473 auto &M = VRMap[PrologStage - StageAdj - Indirects - np];
474 if (auto It = M.find(PhiOp1); It != M.end()) {
475 PhiOp1 = It->second;
476 break;
477 }
478 }
479 ++Indirects;
480 }
481 } else
482 PhiOp1 = InitVal;
483
484
485 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
486 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
488
489 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
490 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
491
492
493
494 if (!InKernel) {
495 int StageDiffAdj = 0;
496 if (LoopValStage != -1 && StageScheduled > LoopValStage)
497 StageDiffAdj = StageScheduled - LoopValStage;
498
499
500 if (np == 0 && PrevStage == LastStageNum &&
501 (StageScheduled != 0 || LoopValStage != 0) &&
502 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
503 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
504
505
506 else if (np > 0 && PrevStage == LastStageNum &&
507 VRMap[PrevStage - np + 1].count(Def))
508 PhiOp2 = VRMap[PrevStage - np + 1][Def];
509
510 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
511 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
512 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
513
514
515 else if (VRMap[PrevStage - np].count(Def) &&
516 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
517 (LoopValStage == StageScheduled)))
518 PhiOp2 = VRMap[PrevStage - np][Def];
519 }
520
521
522
523
524
525 if (LoopDefIsPhi) {
526 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
527 int LVNumStages = getStagesForPhi(LoopVal);
528 int StageDiff = (StageScheduled - LoopValStage);
529 LVNumStages -= StageDiff;
530
531 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
532 NewReg = PhiOp2;
533 unsigned ReuseStage = CurStageNum;
534 if (isLoopCarried(*PhiInst))
535 ReuseStage -= LVNumStages;
536
537
538 if (VRMap[ReuseStage - np].count(LoopVal)) {
539 NewReg = VRMap[ReuseStage - np][LoopVal];
540
541 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
542 Def, NewReg);
543
544 VRMap[CurStageNum - np][Def] = NewReg;
545 PhiOp2 = NewReg;
546 if (VRMap[LastStageNum - np - 1].count(LoopVal))
547 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
548
549 if (IsLast && np == NumPhis - 1)
551 continue;
552 }
553 }
554 }
555 if (InKernel && StageDiff > 0 &&
556 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
557 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
558 }
559
560 const TargetRegisterClass *RC = MRI.getRegClass(Def);
561 NewReg = MRI.createVirtualRegister(RC);
562
563 MachineInstrBuilder NewPhi =
565 TII->get(TargetOpcode::PHI), NewReg);
568 LIS.InsertMachineInstrInMaps(*NewPhi);
569 if (np == 0)
570 InstrMap[NewPhi] = &*BBI;
571
572
573
574
576 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
577 PrevReg = VRMap[PrevStage - np][LoopVal];
578 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
579 NewReg, PrevReg);
580
581 if (VRMap[CurStageNum - np].count(Def)) {
583 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
584 NewReg);
585 }
586
587
588
589
590 if (IsLast && np == NumPhis - 1)
592
593
594 if (InKernel)
595 PhiOp2 = NewReg;
596
597
598 VRMap[CurStageNum - np][Def] = NewReg;
599 }
600
601 while (NumPhis++ < NumStages) {
602 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
603 NewReg, 0);
604 }
605
606
607
608 if (NumStages == 0 && IsLast) {
609 auto &CurStageMap = VRMap[CurStageNum];
610 auto It = CurStageMap.find(LoopVal);
611 if (It != CurStageMap.end())
613 }
614 }
615}
616
617
618
619
620void ModuloScheduleExpander::generatePhis(
622 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
623 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
624 bool IsLast) {
625
626
627 unsigned PrologStage = 0;
628 unsigned PrevStage = 0;
629 unsigned StageDiff = CurStageNum - LastStageNum;
630 bool InKernel = (StageDiff == 0);
631 if (InKernel) {
632 PrologStage = LastStageNum - 1;
633 PrevStage = CurStageNum;
634 } else {
635 PrologStage = LastStageNum - StageDiff;
636 PrevStage = LastStageNum + StageDiff - 1;
637 }
638
640 BBE = BB->instr_end();
641 BBI != BBE; ++BBI) {
642 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
643 MachineOperand &MO = BBI->getOperand(i);
645 continue;
646
647 int StageScheduled = Schedule.getStage(&*BBI);
648 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
650 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
651
652
653
654 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
656 NumPhis = 1;
657 if (!InKernel && (unsigned)StageScheduled > PrologStage)
658 continue;
659
661 if (InKernel) {
662 PhiOp2 = VRMap[PrevStage][Def];
663 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
664 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
666 }
667
668
669 if (NumPhis > PrologStage + 1 - StageScheduled)
670 NumPhis = PrologStage + 1 - StageScheduled;
671 for (unsigned np = 0; np < NumPhis; ++np) {
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694 Register PhiOp1 = VRMap[PrologStage][Def];
695 if (np <= PrologStage)
696 PhiOp1 = VRMap[PrologStage - np][Def];
697 if (!InKernel) {
698 if (PrevStage == LastStageNum && np == 0)
699 PhiOp2 = VRMap[LastStageNum][Def];
700 else
701 PhiOp2 = VRMapPhi[PrevStage - np][Def];
702 }
703
704 const TargetRegisterClass *RC = MRI.getRegClass(Def);
705 Register NewReg = MRI.createVirtualRegister(RC);
706
707 MachineInstrBuilder NewPhi =
709 TII->get(TargetOpcode::PHI), NewReg);
712 LIS.InsertMachineInstrInMaps(*NewPhi);
713 if (np == 0)
714 InstrMap[NewPhi] = &*BBI;
715
716
717
718 if (InKernel) {
719 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
720 NewReg);
721 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
722 NewReg);
723
724 PhiOp2 = NewReg;
725 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
726 } else {
727 VRMapPhi[CurStageNum - np][Def] = NewReg;
728 if (np == NumPhis - 1)
729 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
730 NewReg);
731 }
732 if (IsLast && np == NumPhis - 1)
734 }
735 }
736 }
737}
738
739
740
741
742
743void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
744 MBBVectorTy &EpilogBBs) {
745
746
750 MI != ME;) {
751
752 if (MI->isInlineAsm()) {
753 ++MI;
754 continue;
755 }
756 bool SawStore = false;
757
758
759 if (->isSafeToMove(SawStore) &&
->isPHI()) {
760 ++MI;
761 continue;
762 }
763 bool used = true;
764 for (const MachineOperand &MO : MI->all_defs()) {
766
769 if (used)
770 break;
771 continue;
772 }
773 unsigned realUses = 0;
774 for (const MachineOperand &U : MRI.use_operands(reg)) {
775
776
777 if (U.getParent()->getParent() != BB) {
778 realUses++;
779 used = true;
780 break;
781 }
782 }
783 if (realUses > 0)
784 break;
785 used = false;
786 }
787 if (!used) {
788 LIS.RemoveMachineInstrFromMaps(*MI);
789 MI++->eraseFromParent();
790 continue;
791 }
792 ++MI;
793 }
794
795
797 Register reg = MI.getOperand(0).getReg();
798 if (MRI.use_begin(reg) == MRI.use_end()) {
799 LIS.RemoveMachineInstrFromMaps(MI);
800 MI.eraseFromParent();
801 }
802 }
803}
804
805
806
807
808
809
810
811
812
813
814
815void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
816 MBBVectorTy &EpilogBBs) {
817 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
818 for (auto &PHI : KernelBB->phis()) {
820
821
823 E = MRI.use_instr_end();
825 if (I->isPHI() && I->getParent() == KernelBB) {
826
828 if (!LCDef)
829 continue;
830 MachineInstr *MI = MRI.getVRegDef(LCDef);
831 if ( || MI->getParent() != KernelBB || MI->isPHI())
832 continue;
833
834
838 if (BBJ.readsRegister(Def, nullptr)) {
839
840 if (!SplitReg) {
841 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
842 MachineInstr *newCopy =
843 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
844 TII->get(TargetOpcode::COPY), SplitReg)
846 LIS.InsertMachineInstrInMaps(*newCopy);
847 }
848 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
849 }
850 if (!SplitReg)
851 continue;
852
853 for (auto &Epilog : EpilogBBs)
855 if (I.readsRegister(Def, nullptr))
856 I.substituteRegister(Def, SplitReg, 0, *TRI);
857 break;
858 }
859 }
860 }
861}
862
863
864
865
866void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
867 MBBVectorTy &PrologBBs,
869 MBBVectorTy &EpilogBBs,
870 ValueMapTy *VRMap) {
871 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
872 MachineBasicBlock *LastPro = KernelBB;
873 MachineBasicBlock *LastEpi = KernelBB;
874
875
876
877 unsigned MaxIter = PrologBBs.size() - 1;
878 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
879
880
881 MachineBasicBlock *Prolog = PrologBBs[j];
882 MachineBasicBlock *Epilog = EpilogBBs[i];
883
885 std::optional StaticallyGreater =
886 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
887 unsigned numAdded = 0;
888 if (!StaticallyGreater) {
891 } else if (*StaticallyGreater == false) {
893 Prolog->removeSuccessor(LastPro);
896 Epilog->removePHIsIncomingValuesForPredecessor(*LastEpi);
897
898 if (LastPro != LastEpi) {
899 for (auto &MI : *LastEpi)
900 LIS.RemoveMachineInstrFromMaps(MI);
901 LastEpi->clear();
902 LastEpi->eraseFromParent();
903 }
904 if (LastPro == KernelBB) {
905 LoopInfo->disposed(&LIS);
906 NewKernel = nullptr;
907 }
908 for (auto &MI : *LastPro)
909 LIS.RemoveMachineInstrFromMaps(MI);
910 LastPro->clear();
911 LastPro->eraseFromParent();
912 } else {
913 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
914 Epilog->removePHIsIncomingValuesForPredecessor(*Prolog);
915 }
920 I != E && numAdded > 0; ++I, --numAdded)
921 updateInstruction(&*I, false, j, 0, VRMap);
922 }
923
924 if (NewKernel) {
925 LoopInfo->setPreheader(PrologBBs[MaxIter]);
926 LoopInfo->adjustTripCount(-(MaxIter + 1));
927 }
928}
929
930
931
932bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
933 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
934 const MachineOperand *BaseOp;
936 bool OffsetIsScalable;
937 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
938 return false;
939
940
941 if (OffsetIsScalable)
942 return false;
943
944 if (!BaseOp->isReg())
945 return false;
946
948
949 MachineRegisterInfo &MRI = MF.getRegInfo();
950
951 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
952 if (BaseDef && BaseDef->isPHI()) {
954 BaseDef = MRI.getVRegDef(BaseReg);
955 }
956 if (!BaseDef)
957 return false;
958
959 int D = 0;
960 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
961 return false;
962
963 Delta = D;
964 return true;
965}
966
967
968
969
970void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
972 unsigned Num) {
973 if (Num == 0)
974 return;
975
976
978 return;
980 for (MachineMemOperand *MMO : NewMI.memoperands()) {
981
982 if (MMO->isVolatile() || MMO->isAtomic() ||
983 (MMO->isInvariant() && MMO->isDereferenceable()) ||
984 (!MMO->getValue())) {
986 continue;
987 }
988 unsigned Delta;
989 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
990 int64_t AdjOffset = Delta * Num;
992 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
993 } else {
994 NewMMOs.push_back(MF.getMachineMemOperand(
996 }
997 }
999}
1000
1001
1002
1004 unsigned CurStageNum,
1005 unsigned InstStageNum) {
1006 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1007 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1008 return NewMI;
1009}
1010
1011
1012
1013
1014MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1015 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1016 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1017 auto It = InstrChanges.find(OldMI);
1018 if (It != InstrChanges.end()) {
1019 std::pair<Register, int64_t> RegAndOffset = It->second;
1020 unsigned BasePos, OffsetPos;
1021 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1022 return nullptr;
1024 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1025 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1026 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1028 }
1029 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1030 return NewMI;
1031}
1032
1033
1034
1035void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1036 bool LastDef,
1037 unsigned CurStageNum,
1038 unsigned InstrStageNum,
1039 ValueMapTy *VRMap) {
1040 for (MachineOperand &MO : NewMI->operands()) {
1042 continue;
1044 if (MO.isDef()) {
1045
1046 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1047 Register NewReg = MRI.createVirtualRegister(RC);
1049 VRMap[CurStageNum][reg] = NewReg;
1050 if (LastDef)
1052 } else if (MO.isUse()) {
1053 MachineInstr *Def = MRI.getVRegDef(reg);
1054
1055 int DefStageNum = Schedule.getStage(Def);
1056 unsigned StageNum = CurStageNum;
1057 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1058
1059 unsigned StageDiff = (InstrStageNum - DefStageNum);
1060
1061 StageNum -= StageDiff;
1062 }
1063 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())
1064 MO.setReg(It->second);
1065 }
1066 }
1067}
1068
1069
1070
1071
1073 SmallPtrSet<MachineInstr *, 8> Visited;
1074 MachineInstr *Def = MRI.getVRegDef(Reg);
1075 while (Def->isPHI()) {
1076 if (!Visited.insert(Def).second)
1077 break;
1078 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1079 if (Def->getOperand(i + 1).getMBB() == BB) {
1080 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1081 break;
1082 }
1083 }
1084 return Def;
1085}
1086
1087
1088Register ModuloScheduleExpander::getPrevMapVal(
1089 unsigned StageNum, unsigned PhiStage, Register LoopVal, unsigned LoopStage,
1092 if (StageNum > PhiStage) {
1093 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1094 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1095
1096 PrevVal = VRMap[StageNum - 1][LoopVal];
1097 else if (VRMap[StageNum].count(LoopVal))
1098
1099
1100 PrevVal = VRMap[StageNum][LoopVal];
1101 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1102
1103 PrevVal = LoopVal;
1104 else if (StageNum == PhiStage + 1)
1105
1107 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1108
1109 PrevVal =
1110 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1111 LoopStage, VRMap, BB);
1112 }
1113 return PrevVal;
1114}
1115
1116
1117
1118
1119
1120void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1121 unsigned StageNum,
1122 ValueMapTy *VRMap,
1123 InstrMapTy &InstrMap) {
1124 for (auto &PHI : BB->phis()) {
1128 Register PhiDef = PHI.getOperand(0).getReg();
1129
1130 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1131 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1132 unsigned NumPhis = getStagesForPhi(PhiDef);
1133 if (NumPhis > StageNum)
1134 NumPhis = StageNum;
1135 for (unsigned np = 0; np <= NumPhis; ++np) {
1137 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1138 if (!NewVal)
1139 NewVal = InitVal;
1140 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1141 NewVal);
1142 }
1143 }
1144}
1145
1146
1147
1148
1149void ModuloScheduleExpander::rewriteScheduledInstr(
1150 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1153 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1154 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1155
1156
1157 for (MachineOperand &UseOp :
1159 MachineInstr *UseMI = UseOp.getParent();
1161 continue;
1164 continue;
1166 continue;
1167 }
1169 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1170 MachineInstr *OrigMI = OrigInstr->second;
1171 int StageSched = Schedule.getStage(OrigMI);
1172 int CycleSched = Schedule.getCycle(OrigMI);
1174
1175 if (StagePhi == StageSched && Phi->isPHI()) {
1176 int CyclePhi = Schedule.getCycle(Phi);
1177 if (PrevReg && InProlog)
1178 ReplaceReg = PrevReg;
1179 else if (PrevReg && !isLoopCarried(*Phi) &&
1180 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1181 ReplaceReg = PrevReg;
1182 else
1183 ReplaceReg = NewReg;
1184 }
1185
1186
1187 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1188 ReplaceReg = NewReg;
1189 if (StagePhi > StageSched && Phi->isPHI())
1190 ReplaceReg = NewReg;
1191 if (!InProlog && ->isPHI() && StagePhi < StageSched)
1192 ReplaceReg = NewReg;
1193 if (ReplaceReg) {
1194 const TargetRegisterClass *NRC =
1195 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1196 if (NRC)
1197 UseOp.setReg(ReplaceReg);
1198 else {
1199 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1201 TII->get(TargetOpcode::COPY), SplitReg)
1202 .addReg(ReplaceReg);
1203 UseOp.setReg(SplitReg);
1204 LIS.InsertMachineInstrInMaps(*newCopy);
1205 }
1206 }
1207 }
1208}
1209
1210bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1211 if (.isPHI())
1212 return false;
1213 int DefCycle = Schedule.getCycle(&Phi);
1214 int DefStage = Schedule.getStage(&Phi);
1215
1218 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1219 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1220 if (!Use || Use->isPHI())
1221 return true;
1222 int LoopCycle = Schedule.getCycle(Use);
1223 int LoopStage = Schedule.getStage(Use);
1224 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1225}
1226
1227
1228
1229
1230
1231
1232
1233
1234namespace {
1235
1236
1238 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1244 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1245 if (LIS)
1247 MI.eraseFromParent();
1249 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1251 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1252 MRI.getRegClass(MI.getOperand(0).getReg()));
1253 assert(ConstrainRegClass &&
1254 "Expected a valid constrained register class!");
1255 (void)ConstrainRegClass;
1256 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1257 MI.getOperand(1).getReg());
1258 if (LIS)
1260 MI.eraseFromParent();
1262 }
1263 }
1264 }
1265}
1266
1267
1268
1269class KernelRewriter {
1270 ModuloSchedule &S;
1271 MachineBasicBlock *BB;
1272 MachineBasicBlock *PreheaderBB, *ExitBB;
1273 MachineRegisterInfo &MRI;
1274 const TargetInstrInfo *TII;
1275 LiveIntervals *LIS;
1276
1277
1278 DenseMap<const TargetRegisterClass *, Register> Undefs;
1279
1280
1281 DenseMap<std::pair<Register, Register>, Register> Phis;
1282
1283 DenseMap<Register, Register> UndefPhis;
1284
1285
1286
1288
1289
1290
1292 const TargetRegisterClass *RC = nullptr);
1293
1294 Register undef(const TargetRegisterClass *RC);
1295
1296public:
1297 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1298 LiveIntervals *LIS = nullptr);
1299 void rewrite();
1300};
1301}
1302
1305 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1306 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1307 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1309 if (PreheaderBB == BB)
1310 PreheaderBB = *std::next(BB->pred_begin());
1311}
1312
1313void KernelRewriter::rewrite() {
1314
1315
1316
1317
1319 MachineInstr *FirstMI = nullptr;
1321 if (MI->isPHI())
1322 continue;
1323 if (MI->getParent())
1324 MI->removeFromParent();
1326 if (!FirstMI)
1327 FirstMI = MI;
1328 }
1329 assert(FirstMI && "Failed to find first MI in schedule");
1330
1331
1332
1334 if (LIS)
1336 (I++)->eraseFromParent();
1337 }
1338
1339
1340 for (MachineInstr &MI : *BB) {
1341 if (MI.isPHI() || MI.isTerminator())
1342 continue;
1343 for (MachineOperand &MO : MI.uses()) {
1345 continue;
1348 }
1349 }
1350 EliminateDeadPhis(BB, MRI, LIS);
1351
1352
1353
1354
1355
1356 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1357 if (MI->isPHI()) {
1358 Register R = MI->getOperand(0).getReg();
1360 continue;
1361 }
1362
1363 for (MachineOperand &Def : MI->defs()) {
1364 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1365 if (MI.getParent() != BB) {
1367 break;
1368 }
1369 }
1370 }
1371 }
1372}
1373
1376 if (!Producer)
1377 return Reg;
1378
1379 int ConsumerStage = S.getStage(&MI);
1381
1382
1383 if (Producer->getParent() != BB)
1384
1385 return Reg;
1386 int ProducerStage = S.getStage(Producer);
1387 assert(ConsumerStage != -1 &&
1388 "In-loop consumer should always be scheduled!");
1389 assert(ConsumerStage >= ProducerStage);
1390 unsigned StageDiff = ConsumerStage - ProducerStage;
1391
1392 for (unsigned I = 0; I < StageDiff; ++I)
1394 return Reg;
1395 }
1396
1397
1398
1401 auto LoopProducer = Producer;
1402 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1405 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1406 assert(LoopProducer);
1407 }
1408 int LoopProducerStage = S.getStage(LoopProducer);
1409
1410 std::optional IllegalPhiDefault;
1411
1412 if (LoopProducerStage == -1) {
1413
1414 } else if (LoopProducerStage > ConsumerStage) {
1415
1416
1417
1418
1419#ifndef NDEBUG
1420 int LoopProducerCycle = S.getCycle(LoopProducer);
1421 int ConsumerCycle = S.getCycle(&MI);
1422#endif
1423 assert(LoopProducerCycle <= ConsumerCycle);
1424 assert(LoopProducerStage == ConsumerStage + 1);
1425
1426
1427
1428
1429
1430
1431 IllegalPhiDefault = Defaults.front();
1433 } else {
1434 assert(ConsumerStage >= LoopProducerStage);
1435 int StageDiff = ConsumerStage - LoopProducerStage;
1436 if (StageDiff > 0) {
1437 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1438 << " to " << (Defaults.size() + StageDiff) << "\n");
1439
1440
1441
1442 Defaults.resize(Defaults.size() + StageDiff,
1443 Defaults.empty() ? std::optional()
1444 : Defaults.back());
1445 }
1446 }
1447
1448
1449 auto DefaultI = Defaults.rbegin();
1450 while (DefaultI != Defaults.rend())
1451 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1452
1453 if (IllegalPhiDefault) {
1454
1455
1456
1457
1458
1459 auto RC = MRI.getRegClass(Reg);
1461 MachineInstr *IllegalPhi =
1463 .addReg(*IllegalPhiDefault)
1464 .addMBB(PreheaderBB)
1466 .addMBB(BB);
1467
1468
1469 S.setStage(IllegalPhi, LoopProducerStage);
1470 return R;
1471 }
1472
1473 return LoopReg;
1474}
1475
1476Register KernelRewriter::phi(Register LoopReg, std::optional InitReg,
1477 const TargetRegisterClass *RC) {
1478
1479 if (InitReg) {
1480 auto I = Phis.find({LoopReg, *InitReg});
1481 if (I != Phis.end())
1482 return I->second;
1483 } else {
1484 for (auto &KV : Phis) {
1485 if (KV.first.first == LoopReg)
1486 return KV.second;
1487 }
1488 }
1489
1490
1491
1492 auto I = UndefPhis.find(LoopReg);
1493 if (I != UndefPhis.end()) {
1495 if (!InitReg)
1496
1497
1498 return R;
1499
1500 MachineInstr *MI = MRI.getVRegDef(R);
1501 MI->getOperand(1).setReg(*InitReg);
1502 Phis.insert({{LoopReg, *InitReg}, R});
1503 const TargetRegisterClass *ConstrainRegClass =
1504 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1505 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1506 (void)ConstrainRegClass;
1508 return R;
1509 }
1510
1511
1512 if (!RC)
1513 RC = MRI.getRegClass(LoopReg);
1515 if (InitReg) {
1516 const TargetRegisterClass *ConstrainRegClass =
1517 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1518 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1519 (void)ConstrainRegClass;
1520 }
1522 .addReg(InitReg ? *InitReg : undef(RC))
1523 .addMBB(PreheaderBB)
1526 if (!InitReg)
1527 UndefPhis[LoopReg] = R;
1528 else
1529 Phis[{LoopReg, *InitReg}] = R;
1530 return R;
1531}
1532
1533Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1535 if (R == 0) {
1536
1537
1538
1539 R = MRI.createVirtualRegister(RC);
1540 auto *InsertBB = &PreheaderBB->getParent()->front();
1541 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1542 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1543 }
1544 return R;
1545}
1546
1547namespace {
1548
1549
1550
1551class KernelOperandInfo {
1552 MachineBasicBlock *BB;
1553 MachineRegisterInfo &MRI;
1555 MachineOperand *Source;
1556 MachineOperand *Target;
1557
1558public:
1559 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1560 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1564 while (isRegInLoop(MO)) {
1565 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1566 if (MI->isFullCopy()) {
1567 MO = &MI->getOperand(1);
1568 continue;
1569 }
1570 if (->isPHI())
1571 break;
1572
1573 if (IllegalPhis.count(MI)) {
1574 MO = &MI->getOperand(3);
1575 continue;
1576 }
1577
1579 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1580 : &MI->getOperand(3);
1582 }
1584 }
1585
1587 return PhiDefaults.size() == Other.PhiDefaults.size();
1588 }
1589
1590 void print(raw_ostream &OS) const {
1591 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1592 << *Source->getParent();
1593 }
1594
1595private:
1596 bool isRegInLoop(MachineOperand *MO) {
1598 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1599 }
1600};
1601}
1602
1603MachineBasicBlock *
1608 else
1610 for (auto I = BB->begin(), NI = NewBB->begin(); ->isTerminator();
1611 ++I, ++NI) {
1616 }
1617 return NewBB;
1618}
1619
1621 int MinStage) {
1623 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1626 if (Stage == -1 || Stage >= MinStage)
1627 continue;
1628
1632
1633
1636 MI->getParent());
1638 }
1639 for (auto &Sub : Subs)
1640 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1641 *MRI.getTargetRegisterInfo());
1642 }
1643 if (LIS)
1644 LIS->RemoveMachineInstrFromMaps(*MI);
1645 MI->eraseFromParent();
1646 }
1647}
1648
1655 if (MI.isPHI()) {
1656
1657
1659
1660
1661 Register PhiR = MI.getOperand(0).getReg();
1662 auto RC = MRI.getRegClass(PhiR);
1663 Register NR = MRI.createVirtualRegister(RC);
1665 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1670 Remaps[PhiR] = NR;
1671 }
1672 }
1674 continue;
1675 MI.removeFromParent();
1676 DestBB->insert(InsertPt, &MI);
1679 BlockMIs.erase({SourceBB, KernelMI});
1680 }
1683 assert(MI.getNumOperands() == 3);
1685
1686
1687 if (getStage(Def) == Stage) {
1688 Register PhiReg = MI.getOperand(0).getReg();
1689 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1690 nullptr) != -1);
1691 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1692 MI.getOperand(0).setReg(PhiReg);
1694 }
1695 }
1696 for (auto *P : PhiToDelete)
1697 P->eraseFromParent();
1699
1700
1703 DestBB->insert(InsertPt, NewMI);
1704 Register OrigR = Phi->getOperand(0).getReg();
1705 Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
1709 Remaps[OrigR] = R;
1713 return R;
1714 };
1717 if (!MO.isReg())
1718 continue;
1719 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())
1720 MO.setReg(It->second);
1721 else {
1722
1723
1725 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1728 }
1729 }
1730 }
1731 }
1732}
1733
1740 for (unsigned I = 0; I < distance; ++I) {
1743 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1745 std::swap(LoopRegIdx, InitRegIdx);
1746 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1747 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1748 }
1749 return CanonicalUseReg;
1750}
1751
1757
1758
1759 LS.reset();
1760 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1761 LS[I] = true;
1765 }
1766
1767
1768
1769
1770
1771
1772
1774 EliminateDeadPhis(ExitingBB, MRI, LIS, true);
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1795
1796
1797 EliminateDeadPhis(B, MRI, LIS, true);
1800 }
1801 for (size_t I = 0; I < Epilogs.size(); I++) {
1802 LS.reset();
1803 for (size_t J = I; J < Epilogs.size(); J++) {
1804 int Iteration = J;
1805 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1806
1807 for (size_t K = Iteration; K > I; K--)
1809 LS[Stage] = true;
1810 }
1813 }
1814
1815
1816
1817
1818 auto PI = Prologs.begin();
1819 auto EI = Epilogs.begin();
1821 for (; PI != Prologs.end(); ++PI, ++EI) {
1823 (*PI)->addSuccessor(*EI);
1825 Register Reg = MI.getOperand(1).getReg();
1827 if (Use && Use->getParent() == Pred) {
1829 if (CanonicalUse->isPHI()) {
1830
1831
1832
1834 }
1836 }
1839 }
1840 }
1841
1842
1847
1848
1850 for (auto I = B->instr_rbegin();
1851 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1854 }
1855 }
1857 if (LIS)
1858 LIS->RemoveMachineInstrFromMaps(*MI);
1859 MI->eraseFromParent();
1860 }
1862
1863
1865 EliminateDeadPhis(B, MRI, LIS);
1866 EliminateDeadPhis(ExitingBB, MRI, LIS);
1867}
1868
1872 if (Exit == BB)
1873 Exit = *std::next(BB->succ_begin());
1874
1876 MF.insert(std::next(BB->getIterator()), NewBB);
1877
1878
1880 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1881 Register OldR = MI.getOperand(3).getReg();
1882 Register R = MRI.createVirtualRegister(RC);
1885 if (Use.getParent() != BB)
1888 Use->substituteRegister(OldR, R, 0,
1889 *MRI.getTargetRegisterInfo());
1895 }
1896 BB->replaceSuccessor(Exit, NewBB);
1897 Exit->replacePhiUsesWith(BB, NewBB);
1899
1902 bool CanAnalyzeBr = ->analyzeBranch(*BB, TBB, FBB, Cond);
1903 (void)CanAnalyzeBr;
1904 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1906 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1908 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1909 return NewBB;
1910}
1911
1916 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, nullptr);
1918}
1919
1921 if (MI->isPHI()) {
1922
1923
1924 Register PhiR = MI->getOperand(0).getReg();
1925 Register R = MI->getOperand(3).getReg();
1926 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1927 if (RMIStage != -1 && [MI->getParent()].test(RMIStage))
1928 R = MI->getOperand(1).getReg();
1929 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1930 MRI.replaceRegWith(PhiR, R);
1931
1932
1933 MI->getOperand(0).setReg(PhiR);
1935 return;
1936 }
1937
1939 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1941
1942 return;
1943
1947
1948
1951 MI->getParent());
1953 }
1954 for (auto &Sub : Subs)
1955 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1956 *MRI.getTargetRegisterInfo());
1957 }
1958 if (LIS)
1959 LIS->RemoveMachineInstrFromMaps(*MI);
1960 MI->eraseFromParent();
1961}
1962
1964
1965 bool KernelDisposed = false;
1966 int TC = Schedule.getNumStages() - 1;
1968 ++PI, ++EI, --TC) {
1974 std::optional StaticallyGreater =
1976 if (!StaticallyGreater) {
1977 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1978
1980 } else if (*StaticallyGreater == false) {
1981 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1982
1983
1984 Prolog->removeSuccessor(Fallthrough);
1986 P.removeOperand(2);
1987 P.removeOperand(1);
1988 }
1990 KernelDisposed = true;
1991 } else {
1992 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1993
1996 P.removeOperand(4);
1997 P.removeOperand(3);
1998 }
1999 }
2000 }
2001
2002 if (!KernelDisposed) {
2005 } else {
2007 }
2008}
2009
2014
2016 BB = Schedule.getLoop()->getTopBlock();
2021
2025}
2026
2028 BB = Schedule.getLoop()->getTopBlock();
2030
2031
2032
2033 std::string ScheduleDump;
2037
2038
2039
2040 assert(LIS && "Requires LiveIntervals!");
2045 if (!ExpandedKernel) {
2046
2048 return;
2049 }
2050
2052
2053
2055 KR.rewrite();
2057
2058
2059
2061 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2062 if (NI->isPHI())
2063 IllegalPhis.insert(&*NI);
2064 }
2065
2066
2067
2069 auto OI = ExpandedKernel->begin();
2070 auto NI = BB->begin();
2071 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2072 while (OI->isPHI() || OI->isFullCopy())
2073 ++OI;
2074 while (NI->isPHI() || NI->isFullCopy())
2075 ++NI;
2076 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2077
2078 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2079 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2080 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2081 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2082 }
2083
2084 bool Failed = false;
2085 for (auto &OldAndNew : KOIs) {
2086 if (OldAndNew.first == OldAndNew.second)
2087 continue;
2089 errs() << "Modulo kernel validation error: [\n";
2090 errs() << " [golden] ";
2091 OldAndNew.first.print(errs());
2092 errs() << " ";
2093 OldAndNew.second.print(errs());
2094 errs() << "]\n";
2095 }
2096
2098 errs() << "Golden reference kernel:\n";
2100 errs() << "New kernel:\n";
2102 errs() << ScheduleDump;
2104 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2105 }
2106
2107
2108
2111}
2112
2114 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2115
2116
2118
2119 return NewMI;
2120}
2121
2122
2123
2124
2128 if (Exit->pred_size() == 1)
2129 return Exit;
2130
2133
2136 MF->insert(Loop->getIterator(), NewExit);
2138
2143 FBB = NewExit;
2144 else if (FBB == Loop)
2145 TBB = NewExit;
2146 else
2148 TII->removeBranch(*Loop);
2150 Loop->replaceSuccessor(Exit, NewExit);
2151 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2153
2154 Exit->replacePhiUsesWith(Loop, NewExit);
2155
2156 return NewExit;
2157}
2158
2159
2160
2161
2162void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2163 int RequiredTC,
2164 InstrMapTy &LastStage0Insts,
2165 MachineBasicBlock &GreaterThan,
2166 MachineBasicBlock &Otherwise) {
2168 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2169 LastStage0Insts);
2170
2172
2173
2177 } else {
2179 }
2180}
2181
2182
2183
2184
2185
2186
2187void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2262 assert(LoopInfo && "Must be able to analyze loop!");
2263
2264 calcNumUnroll();
2265
2266 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2267 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2268 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2269 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2270 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2271
2272 MF.insert(OrigKernel->getIterator(), Check);
2274 MF.insert(OrigKernel->getIterator(), Prolog);
2276 MF.insert(OrigKernel->getIterator(), NewKernel);
2278 MF.insert(OrigKernel->getIterator(), Epilog);
2280 MF.insert(OrigKernel->getIterator(), NewPreheader);
2282
2284
2285 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2287
2288 OrigPreheader->addSuccessor(Check);
2291
2293 Check->addSuccessor(NewPreheader);
2294
2295 Prolog->addSuccessor(NewKernel);
2296
2297 NewKernel->addSuccessor(NewKernel);
2298 NewKernel->addSuccessor(Epilog);
2299
2300 Epilog->addSuccessor(NewPreheader);
2301 Epilog->addSuccessor(NewExit);
2302
2303 InstrMapTy LastStage0Insts;
2304 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2305 LastStage0Insts, *Prolog, *NewPreheader);
2306
2307
2308
2310 generateProlog(PrologVRMap);
2311 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2312 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2313}
2314
2315
2316void ModuloScheduleExpanderMVE::updateInstrUse(
2317 MachineInstr *MI, int StageNum, int PhaseNum,
2318 SmallVectorImpl &CurVRMap,
2319 SmallVectorImpl *PrevVRMap) {
2320
2321
2322
2323
2324
2325
2326 for (MachineOperand &UseMO : MI->uses()) {
2327 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2328 continue;
2329 int DiffStage = 0;
2330 Register OrigReg = UseMO.getReg();
2331 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2332 if (!DefInst || DefInst->getParent() != OrigKernel)
2333 continue;
2336 if (DefInst->isPHI()) {
2337 ++DiffStage;
2339 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2340
2341 DefReg = LoopReg;
2342 DefInst = MRI.getVRegDef(LoopReg);
2343 }
2344 unsigned DefStageNum = Schedule.getStage(DefInst);
2345 DiffStage += StageNum - DefStageNum;
2347 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2348
2349 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2350 else if (!PrevVRMap)
2351
2352
2353 NewReg = InitReg;
2354 else
2355
2356
2357
2358
2359 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2360
2361 const TargetRegisterClass *NRC =
2362 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2363 if (NRC)
2364 UseMO.setReg(NewReg);
2365 else {
2366 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2367 MachineInstr *NewCopy = BuildMI(*OrigKernel, MI, MI->getDebugLoc(),
2368 TII->get(TargetOpcode::COPY), SplitReg)
2371 UseMO.setReg(SplitReg);
2372 }
2373 }
2374}
2375
2376
2377
2382 if (LoopVal == Reg)
2383 return Φ
2384 }
2385 return nullptr;
2386}
2387
2388
2389void ModuloScheduleExpanderMVE::generatePhi(
2390 MachineInstr *OrigMI, int UnrollNum,
2391 SmallVectorImpl &PrologVRMap,
2392 SmallVectorImpl &KernelVRMap,
2393 SmallVectorImpl &PhiVRMap) {
2394 int StageNum = Schedule.getStage(OrigMI);
2395 bool UsePrologReg;
2396 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2397 UsePrologReg = true;
2398 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2399 UsePrologReg = false;
2400 else
2401 return;
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434 for (MachineOperand &DefMO : OrigMI->defs()) {
2435 if (!DefMO.isReg() || DefMO.isDead())
2436 continue;
2437 Register OrigReg = DefMO.getReg();
2438 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2439 if (NewReg == KernelVRMap[UnrollNum].end())
2440 continue;
2442 if (UsePrologReg) {
2443 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2444 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2445 } else {
2447 if (!Phi)
2448 continue;
2449 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2450 }
2451
2453 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2454 MachineInstr *NewPhi =
2455 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2456 TII->get(TargetOpcode::PHI), PhiReg)
2457 .addReg(NewReg->second)
2459 .addReg(CorrespondReg)
2462 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2463 }
2464}
2465
2468 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2469 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2470 Phi.getOperand(Idx).setReg(NewReg);
2471 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2472 return;
2473 }
2474 }
2475}
2476
2477
2478void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2486 if (O.getParent()->getParent() != OrigKernel &&
2487 O.getParent()->getParent() != Prolog &&
2488 O.getParent()->getParent() != NewKernel &&
2489 O.getParent()->getParent() != Epilog)
2491 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2493 }
2494
2495
2496
2497 if (!UsesAfterLoop.empty()) {
2498 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2499 MachineInstr *NewPhi =
2500 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2501 TII->get(TargetOpcode::PHI), PhiReg)
2507
2508 for (MachineOperand *MO : UsesAfterLoop)
2510
2511
2512
2515 }
2516
2517
2518
2519 if (!LoopPhis.empty()) {
2520 for (MachineInstr *Phi : LoopPhis) {
2522 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2523 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2524 MachineInstr *NewPhi =
2525 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(),
2526 Phi->getDebugLoc(), TII->get(TargetOpcode::PHI), NewInit)
2532 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2533 }
2534 }
2535}
2536
2537void ModuloScheduleExpanderMVE::generateProlog(
2538 SmallVectorImpl &PrologVRMap) {
2539 PrologVRMap.clear();
2540 PrologVRMap.resize(Schedule.getNumStages() - 1);
2541 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2542 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2543 ++PrologNum) {
2544 for (MachineInstr *MI : Schedule.getInstructions()) {
2545 if (MI->isPHI())
2546 continue;
2547 int StageNum = Schedule.getStage(MI);
2548 if (StageNum > PrologNum)
2549 continue;
2551 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2552 NewMIMap[NewMI] = {PrologNum, StageNum};
2553 Prolog->push_back(NewMI);
2555 }
2556 }
2557
2558 for (auto I : NewMIMap) {
2559 MachineInstr *MI = I.first;
2560 int PrologNum = I.second.first;
2561 int StageNum = I.second.second;
2562 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2563 }
2564
2566 dbgs() << "prolog:\n";
2568 });
2569}
2570
2571void ModuloScheduleExpanderMVE::generateKernel(
2572 SmallVectorImpl &PrologVRMap,
2573 SmallVectorImpl &KernelVRMap, InstrMapTy &LastStage0Insts) {
2574 KernelVRMap.clear();
2575 KernelVRMap.resize(NumUnroll);
2577 PhiVRMap.resize(NumUnroll);
2578 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2579 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2580 for (MachineInstr *MI : Schedule.getInstructions()) {
2581 if (MI->isPHI())
2582 continue;
2583 int StageNum = Schedule.getStage(MI);
2585 if (UnrollNum == NumUnroll - 1)
2586 LastStage0Insts[MI] = NewMI;
2587 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2588 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2589 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2590 NewMIMap[NewMI] = {UnrollNum, StageNum};
2591 NewKernel->push_back(NewMI);
2593 }
2594 }
2595
2596 for (auto I : NewMIMap) {
2597 MachineInstr *MI = I.first;
2598 int UnrollNum = I.second.first;
2599 int StageNum = I.second.second;
2600 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2601 }
2602
2603
2604 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2606
2608 dbgs() << "kernel:\n";
2609 NewKernel->dump();
2610 });
2611}
2612
2613void ModuloScheduleExpanderMVE::generateEpilog(
2614 SmallVectorImpl &KernelVRMap,
2615 SmallVectorImpl &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2616 EpilogVRMap.clear();
2617 EpilogVRMap.resize(Schedule.getNumStages() - 1);
2618 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2619 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2620 ++EpilogNum) {
2621 for (MachineInstr *MI : Schedule.getInstructions()) {
2622 if (MI->isPHI())
2623 continue;
2624 int StageNum = Schedule.getStage(MI);
2625 if (StageNum <= EpilogNum)
2626 continue;
2628 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2629 NewMIMap[NewMI] = {EpilogNum, StageNum};
2630 Epilog->push_back(NewMI);
2632 }
2633 }
2634
2635 for (auto I : NewMIMap) {
2636 MachineInstr *MI = I.first;
2637 int EpilogNum = I.second.first;
2638 int StageNum = I.second.second;
2639 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2640 }
2641
2642
2643
2644
2645
2646 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2647
2649 dbgs() << "epilog:\n";
2651 });
2652}
2653
2654
2655void ModuloScheduleExpanderMVE::calcNumUnroll() {
2656 DenseMap<MachineInstr *, unsigned> Inst2Idx;
2657 NumUnroll = 1;
2658 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2659 Inst2Idx[Schedule.getInstructions()[I]] = I;
2660
2661 for (MachineInstr *MI : Schedule.getInstructions()) {
2662 if (MI->isPHI())
2663 continue;
2664 int StageNum = Schedule.getStage(MI);
2665 for (const MachineOperand &MO : MI->uses()) {
2667 continue;
2670 continue;
2671
2672 int NumUnrollLocal = 1;
2674 ++NumUnrollLocal;
2675
2676
2678 }
2679 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2680 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2681 --NumUnrollLocal;
2682 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2683 }
2684 }
2685 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2686}
2687
2688
2689
2690
2691void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2692 ValueMapTy &VRMap,
2693 bool LastDef) {
2694 for (MachineOperand &MO : NewMI->all_defs()) {
2696 continue;
2698 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2699 Register NewReg = MRI.createVirtualRegister(RC);
2701 VRMap[Reg] = NewReg;
2702 if (LastDef)
2703 mergeRegUsesAfterPipeline(Reg, NewReg);
2704 }
2705}
2706
2708 OrigKernel = Schedule.getLoop()->getTopBlock();
2709 OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2710 OrigExit = Schedule.getLoop()->getExitBlock();
2711
2713
2714 generatePipelinedLoop();
2715}
2716
2717
2719 if (!L.getExitBlock()) {
2720 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2721 return false;
2722 }
2723
2726
2727
2728
2731
2732
2736 if (Ref.getParent() != BB || Ref.isPHI()) {
2737 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2738 "referenced outside of the loop or by phi.\n");
2739 return false;
2740 }
2741
2742
2743
2744
2747 if ((LoopVal).isVirtual() ||
2748 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2750 dbgs() << "Can not apply MVE expander: A phi source value coming "
2751 "from the loop is not defined in the loop.\n");
2752 return false;
2753 }
2754 if (UsedByPhi.count(LoopVal)) {
2755 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2756 "loop is referenced by two or more phis.\n");
2757 return false;
2758 }
2759 UsedByPhi.insert(LoopVal);
2760 }
2761
2762 return true;
2763}
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778namespace {
2780public:
2781 static char ID;
2782
2785 }
2786
2787 bool runOnMachineFunction(MachineFunction &MF) override;
2789
2790 void getAnalysisUsage(AnalysisUsage &AU) const override {
2794 }
2795};
2796}
2797
2798char ModuloScheduleTest::ID = 0;
2799
2801 "Modulo Schedule test pass", false, false)
2806
2807bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2808 MachineLoopInfo &MLI = getAnalysis().getLI();
2809 for (auto *L : MLI) {
2810 if (L->getTopBlock() != L->getBottomBlock())
2811 continue;
2812 runOnLoop(MF, *L);
2813 return false;
2814 }
2815 return false;
2816}
2817
2819 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2820 std::pair<StringRef, StringRef> StageTokenAndValue =
2821 getToken(StageAndCycle.first, "-");
2822 std::pair<StringRef, StringRef> CycleTokenAndValue =
2823 getToken(StageAndCycle.second, "-");
2824 if (StageTokenAndValue.first != "Stage" ||
2825 CycleTokenAndValue.first != "_Cycle") {
2827 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2828 return;
2829 }
2830
2831 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2832 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2833
2834 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2835}
2836
2837void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2838 LiveIntervals &LIS = getAnalysis().getLIS();
2839 MachineBasicBlock *BB = L.getTopBlock();
2840 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2841
2842 DenseMap<MachineInstr *, int> Cycle, Stage;
2843 std::vector<MachineInstr *> Instrs;
2844 for (MachineInstr &MI : *BB) {
2845 if (MI.isTerminator())
2846 continue;
2847 Instrs.push_back(&MI);
2848 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2849 dbgs() << "Parsing post-instr symbol for " << MI;
2851 }
2852 }
2853
2854 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2855 std::move(Stage));
2856 ModuloScheduleExpander MSE(
2858 MSE.expand();
2859 MSE.cleanup();
2860}
2861
2862
2863
2864
2865
2870 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2871 MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2872 MI->setPostInstrSymbol(MF, Sym);
2873 }
2874}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
static bool hasUseAfterLoop(Register Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
Definition ModuloSchedule.cpp:360
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
Definition ModuloSchedule.cpp:349
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
Definition ModuloSchedule.cpp:2466
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
Definition ModuloSchedule.cpp:2378
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit, LiveIntervals &LIS)
Create a dedicated exit for Loop.
Definition ModuloSchedule.cpp:2125
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
Definition ModuloSchedule.cpp:2818
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static Register getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
Definition ModuloSchedule.cpp:57
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Implements a dense probed hash-table based set.
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Represents a single loop in the control flow graph.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
LLVM_ABI void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
succ_iterator succ_begin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
pred_iterator pred_begin()
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineInstrBundleIterator< MachineInstr > iterator
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
LLVM_ABI void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
defusechain_iterator< true, false, false, true, false > use_iterator
use_iterator/use_begin/use_end - Walk all uses of the specified register.
void expand()
Definition ModuloSchedule.cpp:2707
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
Definition ModuloSchedule.cpp:2718
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
Definition ModuloSchedule.cpp:189
void expand()
Performs the actual expansion.
Definition ModuloSchedule.cpp:72
DenseMap< MachineInstr *, std::pair< Register, int64_t > > InstrChangesTy
void annotate()
Performs the annotation.
Definition ModuloSchedule.cpp:2866
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
Definition ModuloSchedule.cpp:30
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
void expand()
Definition ModuloSchedule.cpp:2015
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
Definition ModuloSchedule.cpp:1604
ModuloSchedule & Schedule
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
Definition ModuloSchedule.cpp:1920
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopInfo
Target loop info before kernel peeling.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
Definition ModuloSchedule.cpp:1913
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
Definition ModuloSchedule.cpp:2010
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
const TargetInstrInfo * TII
void filterInstructions(MachineBasicBlock *MB, int MinStage)
Definition ModuloSchedule.cpp:1620
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
Definition ModuloSchedule.cpp:1752
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
Definition ModuloSchedule.cpp:1963
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
Definition ModuloSchedule.cpp:1869
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Definition ModuloSchedule.cpp:2027
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
Definition ModuloSchedule.cpp:1735
MachineRegisterInfo & MRI
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Definition ModuloSchedule.cpp:1649
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
reverse_iterator rbegin()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual const TargetInstrInfo * getInstrInfo() const
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI void initializeModuloScheduleTestPass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.