LLVM: lib/CodeGen/MachineLICM.cpp Source File (original) (raw)
52#include
53#include
54#include
55
56using namespace llvm;
57
58#define DEBUG_TYPE "machinelicm"
59
62 cl::desc("MachineLICM should avoid speculation"),
64
67 cl::desc("MachineLICM should hoist even cheap instructions"),
69
72 cl::desc("Hoist invariant stores"),
74
76 cl::desc("Hoist invariant loads"),
78
79
80
83 cl::desc("Do not hoist instructions if target"
84 "block is N times hotter than the source."),
86
88
91 cl::desc("Disable hoisting instructions to"
92 " hotter blocks"),
95 "disable the feature"),
97 "enable the feature when using profile data"),
99 "enable the feature with/wo profile data")));
100
102 "Number of machine instructions hoisted out of loops");
104 "Number of instructions hoisted in low reg pressure situation");
106 "Number of high latency instructions hoisted");
108 "Number of hoisted machine instructions CSEed");
110 "Number of machine instructions hoisted out of loops post regalloc");
112 "Number of stores of const phys reg hoisted out of loops");
114 "Number of instructions not hoisted due to block frequency");
115
116namespace {
117 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
118
119 class MachineLICMImpl {
120 const TargetInstrInfo *TII = nullptr;
121 const TargetLoweringBase *TLI = nullptr;
122 const TargetRegisterInfo *TRI = nullptr;
123 const MachineFrameInfo *MFI = nullptr;
124 MachineRegisterInfo *MRI = nullptr;
125 TargetSchedModel SchedModel;
126 bool PreRegAlloc = false;
127 bool HasProfileData = false;
128 Pass *LegacyPass;
130
131
132 AliasAnalysis *AA = nullptr;
133 MachineBlockFrequencyInfo *MBFI = nullptr;
134 MachineLoopInfo *MLI = nullptr;
135 MachineDomTreeUpdater *MDTU = nullptr;
136
137
138 bool Changed = false;
139 bool FirstInLoop = false;
140
141
142
143 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
144
145
146 DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
147
148 bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {
149 auto [It, Inserted] = ExitBlockMap.try_emplace(CurLoop);
150 if (Inserted) {
151 SmallVector<MachineBasicBlock *, 8> ExitBlocks;
153 It->second = std::move(ExitBlocks);
154 }
156 }
157
158
159 SmallDenseSet RegSeen;
160 SmallVector<unsigned, 8> RegPressure;
161
162
163
164 SmallVector<unsigned, 8> RegLimit;
165
166
168
169
170 DenseMap<MachineBasicBlock *,
171 DenseMap<unsigned, std::vector<MachineInstr *>>>
172 CSEMap;
173
174 enum {
175 SpeculateFalse = 0,
176 SpeculateTrue = 1,
177 SpeculateUnknown = 2
178 };
179
180
181
182
183 unsigned SpeculationState = SpeculateUnknown;
184
185 public:
186 MachineLICMImpl(bool PreRegAlloc, Pass *LegacyPass,
188 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
189 assert((LegacyPass || MFAM) && "LegacyPass or MFAM must be provided");
190 assert(!(LegacyPass && MFAM) &&
191 "LegacyPass and MFAM cannot be provided at the same time");
192 }
193
194 bool run(MachineFunction &MF);
195
196 void releaseMemory() {
197 RegSeen.clear();
198 RegPressure.clear();
199 RegLimit.clear();
200 BackTrace.clear();
201 CSEMap.clear();
202 ExitBlockMap.clear();
203 }
204
205 private:
206
207 struct CandidateInfo {
208 MachineInstr *MI;
210 int FI;
211
212 CandidateInfo(MachineInstr *mi, Register def, int fi)
213 : MI(mi), Def(def), FI(fi) {}
214 };
215
216 void HoistRegionPostRA(MachineLoop *CurLoop);
217
218 void HoistPostRA(MachineInstr *MI, Register Def, MachineLoop *CurLoop);
219
220 void ProcessMI(MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers,
221 SmallDenseSet &StoredFIs,
222 SmallVectorImpl &Candidates,
223 MachineLoop *CurLoop);
224
225 void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);
226
227 bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);
228
229 bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);
230
231 bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);
232
233 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg,
234 MachineLoop *CurLoop) const;
235
236 bool IsCheapInstruction(MachineInstr &MI) const;
237
238 bool CanCauseHighRegPressure(const SmallDenseMap<unsigned, int> &Cost,
239 bool Cheap);
240
241 void UpdateBackTraceRegPressure(const MachineInstr *MI);
242
243 bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop);
244
245 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
246
247 void EnterScope(MachineBasicBlock *MBB);
248
249 void ExitScope(MachineBasicBlock *MBB);
250
251 void ExitScopeIfDone(
253 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
254 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
255
256 void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop);
257
258 void InitRegPressure(MachineBasicBlock *BB);
259
260 SmallDenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
261 bool ConsiderSeen,
262 bool ConsiderUnseenAsDef);
263
264 void UpdateRegPressure(const MachineInstr *MI,
265 bool ConsiderUnseenAsDef = false);
266
267 MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop);
268
269 MachineInstr *LookForDuplicate(const MachineInstr *MI,
270 std::vector<MachineInstr *> &PrevMIs);
271
272 bool
273 EliminateCSE(MachineInstr *MI,
274 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
275
276 bool MayCSE(MachineInstr *MI);
277
278 unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
279 MachineLoop *CurLoop);
280
281 void InitCSEMap(MachineBasicBlock *BB);
282
283 void InitializeLoadsHoistableLoops();
284
285 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
286 MachineBasicBlock *TgtBlock);
287 MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);
288 };
289
291 bool PreRegAlloc;
292
293 public:
294 MachineLICMBase(char &ID, bool PreRegAlloc)
295 : MachineFunctionPass(ID), PreRegAlloc(PreRegAlloc) {}
296
297 bool runOnMachineFunction(MachineFunction &MF) override;
298
299 void getAnalysisUsage(AnalysisUsage &AU) const override {
300 AU.addRequired();
302 AU.addRequired();
303 AU.addRequired();
305 AU.addPreserved();
307 }
308 };
309
310 class MachineLICM : public MachineLICMBase {
311 public:
312 static char ID;
313 MachineLICM() : MachineLICMBase(ID, false) {
315 }
316 };
317
318 class EarlyMachineLICM : public MachineLICMBase {
319 public:
320 static char ID;
321 EarlyMachineLICM() : MachineLICMBase(ID, true) {
323 }
324 };
325
326}
327
328char MachineLICM::ID;
329char EarlyMachineLICM::ID;
330
333
335 "Machine Loop Invariant Code Motion", false, false)
341 "Machine Loop Invariant Code Motion", false, false)
342
344 "Early Machine Loop Invariant Code Motion", false, false)
350 "Early Machine Loop Invariant Code Motion", false, false)
351
352bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
353 if (skipFunction(MF.getFunction()))
354 return false;
355
356 MachineLICMImpl Impl(PreRegAlloc, this, nullptr);
357 return Impl.run(MF);
358}
359
360#define GET_RESULT(RESULT, GETTER, INFIX) \
361 ((LegacyPass) \
362 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
363 : &MFAM->getResult<RESULT##Analysis>(MF))
364
366 AA = MFAM != nullptr
368 .getManager()
372 MachineDomTreeUpdater::UpdateStrategy::Lazy);
373 MDTU = &DTU;
377 : nullptr;
378
379 Changed = FirstInLoop = false;
381 TII = ST.getInstrInfo();
382 TLI = ST.getTargetLowering();
383 TRI = ST.getRegisterInfo();
386 SchedModel.init(&ST);
387
389
390 if (PreRegAlloc)
391 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
392 else
393 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
395
396 if (PreRegAlloc) {
397
398 unsigned NumRPS = TRI->getNumRegPressureSets();
399 RegPressure.resize(NumRPS);
401 RegLimit.resize(NumRPS);
402 for (unsigned i = 0, e = NumRPS; i != e; ++i)
403 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
404 }
405
407 InitializeLoadsHoistableLoops();
408
410 while (!Worklist.empty()) {
412
413 if (!PreRegAlloc) {
414 HoistRegionPostRA(CurLoop);
415 } else {
416
417
419 FirstInLoop = true;
420 HoistOutOfLoop(N, CurLoop);
421 CSEMap.clear();
422 }
423 }
424 releaseMemory();
426}
427
428
430
431
432 if (->mayStore())
433 return false;
434
435
436 if (MI->memoperands_empty())
437 return true;
439 if (->isStore() ||
->getPseudoValue())
440 continue;
443 if (Value->getFrameIndex() == FI)
444 return true;
445 }
446 }
447 return false;
448}
449
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483 BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());
484 const unsigned NumRegs = TRI.getNumRegs();
485 const unsigned MaskWords = (NumRegs + 31) / 32;
486 for (unsigned K = 0; K < MaskWords; ++K) {
487 const uint32_t Word = Mask[K];
488 for (unsigned Bit = 0; Bit < 32; ++Bit) {
489 const unsigned PhysReg = (K * 32) + Bit;
490 if (PhysReg == NumRegs)
491 break;
492
493 if (PhysReg && !((Word >> Bit) & 1)) {
494 for (MCRegUnit Unit : TRI.regunits(PhysReg))
495 RUsFromRegsNotInMask.set(static_cast<unsigned>(Unit));
496 }
497 }
498 }
499
500 RUs |= RUsFromRegsNotInMask;
501}
502
503
504
505void MachineLICMImpl::ProcessMI(MachineInstr *MI, BitVector &RUDefs,
506 BitVector &RUClobbers,
507 SmallDenseSet &StoredFIs,
508 SmallVectorImpl &Candidates,
509 MachineLoop *CurLoop) {
510 bool RuledOut = false;
511 bool HasNonInvariantUse = false;
513 for (const MachineOperand &MO : MI->operands()) {
514 if (MO.isFI()) {
515
516 int FI = MO.getIndex();
517 if (!StoredFIs.count(FI) &&
520 StoredFIs.insert(FI);
521 HasNonInvariantUse = true;
522 continue;
523 }
524
525
526
527 if (MO.isRegMask()) {
529 continue;
530 }
531
532 if (!MO.isReg())
533 continue;
535 if ()
536 continue;
538
539 if (!MO.isDef()) {
540 if (!HasNonInvariantUse) {
541 for (MCRegUnit Unit : TRI->regunits(Reg)) {
542
543
544 if (RUDefs.test(static_cast<unsigned>(Unit)) ||
545 RUClobbers.test(static_cast<unsigned>(Unit))) {
546 HasNonInvariantUse = true;
547 break;
548 }
549 }
550 }
551 continue;
552 }
553
554
555 if (!MO.isDead()) {
556 if (Def)
557 RuledOut = true;
558 else
560 }
561
562
563
564
565 for (MCRegUnit Unit : TRI->regunits(Reg)) {
566 if (RUDefs.test(static_cast<unsigned>(Unit))) {
567 RUClobbers.set(static_cast<unsigned>(Unit));
568 RuledOut = true;
569 } else if (RUClobbers.test(static_cast<unsigned>(Unit))) {
570
571
572 RuledOut = true;
573 }
574
575 RUDefs.set(static_cast<unsigned>(Unit));
576 }
577 }
578
579
580
581 if (Def && !RuledOut) {
582 int FI = std::numeric_limits::min();
583 if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||
585 Candidates.push_back(CandidateInfo(MI, Def, FI));
586 }
587}
588
589
590
591void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop) {
592 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
593 if (!Preheader)
594 return;
595
596 unsigned NumRegUnits = TRI->getNumRegUnits();
597 BitVector RUDefs(NumRegUnits);
598 BitVector RUClobbers(NumRegUnits);
599
601 SmallDenseSet StoredFIs;
602
603
604
605 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
606
607
609 if (ML && ML->getHeader()->isEHPad()) continue;
610
611
612
613
614 for (const auto &LI : BB->liveins()) {
615 for (MCRegUnit Unit : TRI->regunits(LI.PhysReg))
616 RUDefs.set(static_cast<unsigned>(Unit));
617 }
618
619
620 if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
622
623
624 if (BB->isEHPad()) {
625 const MachineFunction &MF = *BB->getParent();
629 for (MCRegUnit Unit : TRI->regunits(Reg))
630 RUClobbers.set(static_cast<unsigned>(Unit));
632 for (MCRegUnit Unit : TRI->regunits(Reg))
633 RUClobbers.set(static_cast<unsigned>(Unit));
634 }
635
636 SpeculationState = SpeculateUnknown;
637 for (MachineInstr &MI : *BB)
638 ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
639 }
640
641
642 BitVector TermRUs(NumRegUnits);
644 if (TI != Preheader->end()) {
645 for (const MachineOperand &MO : TI->operands()) {
646 if (!MO.isReg())
647 continue;
649 if ()
650 continue;
651 for (MCRegUnit Unit : TRI->regunits(Reg))
652 TermRUs.set(static_cast<unsigned>(Unit));
653 }
654 }
655
656
657
658
659
660
661
662
663
664 for (CandidateInfo &Candidate : Candidates) {
665 if (Candidate.FI != std::numeric_limits::min() &&
666 StoredFIs.count(Candidate.FI))
667 continue;
668
670 bool Safe = true;
671 for (MCRegUnit Unit : TRI->regunits(Def)) {
672 if (RUClobbers.test(static_cast<unsigned>(Unit)) ||
673 TermRUs.test(static_cast<unsigned>(Unit))) {
674 Safe = false;
675 break;
676 }
677 }
678
679 if (!Safe)
680 continue;
681
682 MachineInstr *MI = Candidate.MI;
683 for (const MachineOperand &MO : MI->all_uses()) {
684 if (!MO.getReg())
685 continue;
686 for (MCRegUnit Unit : TRI->regunits(MO.getReg())) {
687 if (RUDefs.test(static_cast<unsigned>(Unit)) ||
688 RUClobbers.test(static_cast<unsigned>(Unit))) {
689
690
691 Safe = false;
692 break;
693 }
694 }
695
696 if (!Safe)
697 break;
698 }
699
700 if (Safe)
701 HoistPostRA(MI, Candidate.Def, CurLoop);
702 }
703}
704
705
706
707void MachineLICMImpl::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) {
708 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
709 if (!BB->isLiveIn(Reg))
710 BB->addLiveIn(Reg);
711 for (MachineInstr &MI : *BB) {
712 for (MachineOperand &MO : MI.all_uses()) {
713 if (!MO.getReg())
714 continue;
715 if (TRI->regsOverlap(Reg, MO.getReg()))
716 MO.setIsKill(false);
717 }
718 }
719 }
720}
721
722
723
724void MachineLICMImpl::HoistPostRA(MachineInstr *MI, Register Def,
725 MachineLoop *CurLoop) {
727
728
729
732 << *MI);
733
734
735 MachineBasicBlock *MBB = MI->getParent();
737
738
739
740
741 assert(->isDebugInstr() && "Should not hoist debug inst");
743
744
745
746
747 AddToLiveIns(Def, CurLoop);
748
749 ++NumPostRAHoisted;
751}
752
753
754
755bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
756 MachineLoop *CurLoop) {
757 if (SpeculationState != SpeculateUnknown)
758 return SpeculationState == SpeculateFalse;
759
760 if (BB != CurLoop->getHeader()) {
761
762 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
764 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
766 SpeculationState = SpeculateTrue;
767 return false;
768 }
769 }
770
771 SpeculationState = SpeculateFalse;
772 return true;
773}
774
775void MachineLICMImpl::EnterScope(MachineBasicBlock *MBB) {
777
778
779 BackTrace.push_back(RegPressure);
780}
781
782void MachineLICMImpl::ExitScope(MachineBasicBlock *MBB) {
785}
786
787
788
789
790void MachineLICMImpl::ExitScopeIfDone(
792 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
793 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {
794 if (OpenChildren[Node])
795 return;
796
797 for(;;) {
798 ExitScope(Node->getBlock());
799
801 if (!Parent || --OpenChildren[Parent] != 0)
802 break;
803 Node = Parent;
804 }
805}
806
807
808
809
810
812 MachineLoop *CurLoop) {
813 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
814 if (!Preheader)
815 return;
816
819 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
820 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
821
822
824 while (!WorkList.empty()) {
826 assert(Node && "Null dominator tree node?");
827 MachineBasicBlock *BB = Node->getBlock();
828
829
830
832 if (ML && ML->getHeader()->isEHPad())
833 continue;
834
835
837 continue;
838
839 Scopes.push_back(Node);
840 unsigned NumChildren = Node->getNumChildren();
841
842
843
844
846 NumChildren = 0;
847
848 OpenChildren[Node] = NumChildren;
849 if (NumChildren) {
850
851
852
854 ParentMap[Child] = Node;
856 }
857 }
858 }
859
860 if (Scopes.size() == 0)
861 return;
862
863
865 BackTrace.clear();
866 InitRegPressure(Preheader);
867
868
870 MachineBasicBlock *MBB = Node->getBlock();
871
872 EnterScope(MBB);
873
874
875 SpeculationState = SpeculateUnknown;
877 unsigned HoistRes = HoistResult::NotHoisted;
878 HoistRes = Hoist(&MI, Preheader, CurLoop);
879 if (HoistRes & HoistResult::NotHoisted) {
880
881
883 for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop;
884 L = L->getParentLoop())
886
887 while (!InnerLoopWorkList.empty()) {
888 MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();
889 MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
890 if (InnerLoopPreheader) {
891 HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);
892 if (HoistRes & HoistResult::Hoisted)
893 break;
894 }
895 }
896 }
897
898 if (HoistRes & HoistResult::ErasedMI)
899 continue;
900
901 UpdateRegPressure(&MI);
902 }
903
904
905 ExitScopeIfDone(Node, OpenChildren, ParentMap);
906 }
907}
908
912
913
914
915
916void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
918
919
920
921
922
924 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
928 }
929
930 for (const MachineInstr &MI : *BB)
931 UpdateRegPressure(&MI, true);
932}
933
934
935void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,
936 bool ConsiderUnseenAsDef) {
937 auto Cost = calcRegisterCost(MI, true, ConsiderUnseenAsDef);
938 for (const auto &[Class, Weight] : Cost) {
939 if (static_cast<int>(RegPressure[Class]) < -Weight)
941 else
943 }
944}
945
946
947
948
949
950
951
952SmallDenseMap<unsigned, int>
953MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
954 bool ConsiderUnseenAsDef) {
955 SmallDenseMap<unsigned, int> Cost;
956 if (MI->isImplicitDef())
958 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
959 const MachineOperand &MO = MI->getOperand(i);
961 continue;
964 continue;
965
966
967 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
968 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
969
970 RegClassWeight W = TRI->getRegClassWeight(RC);
971 int RCCost = 0;
973 RCCost = W.RegWeight;
974 else {
976 if (isNew && !isKill && ConsiderUnseenAsDef)
977
978 RCCost = W.RegWeight;
979 else if (!isNew && isKill)
980 RCCost = -W.RegWeight;
981 }
982 if (RCCost == 0)
983 continue;
984 const int *PS = TRI->getRegClassPressureSets(RC);
985 for (; *PS != -1; ++PS)
986 Cost[*PS] += RCCost;
987 }
989}
990
991
992
994 assert(MI.mayLoad() && "Expected MI that loads!");
995
996
997
998 if (MI.memoperands_empty())
999 return true;
1000
1003 if (PSV->isGOT() || PSV->isConstantPool())
1004 return true;
1005
1006 return false;
1007}
1008
1009
1010
1011
1012
1013
1014
1015
1019
1020 bool FoundCallerPresReg = false;
1021 if (.mayStore() || MI.hasUnmodeledSideEffects() ||
1022 (MI.getNumOperands() == 0))
1023 return false;
1024
1025
1027 if (MO.isReg()) {
1029
1030
1031 if (Reg.isVirtual())
1033 if (Reg.isVirtual())
1034 return false;
1035 if (->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
1036 return false;
1037 else
1038 FoundCallerPresReg = true;
1039 } else if (!MO.isImm()) {
1040 return false;
1041 }
1042 }
1043 return FoundCallerPresReg;
1044}
1045
1046
1047
1048
1049
1053
1054
1055
1056 if (.isCopy())
1057 return false;
1058
1060
1061 Register CopySrcReg = MI.getOperand(1).getReg();
1063 return false;
1064
1065 if (->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
1066 return false;
1067
1068 Register CopyDstReg = MI.getOperand(0).getReg();
1069
1070 assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
1071
1074 return true;
1075 }
1076 return false;
1077}
1078
1079
1080
1081bool MachineLICMImpl::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
1082
1083 bool DontMoveAcrossStore = || !AllowedToHoistLoads[CurLoop];
1084 if ((.isSafeToMove(DontMoveAcrossStore)) &&
1086 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
1087 return false;
1088 }
1089
1090
1091
1092
1093
1094
1095
1097 !IsGuaranteedToExecute(I.getParent(), CurLoop)) {
1098 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
1099 return false;
1100 }
1101
1102
1103
1104
1105
1106 if (I.isConvergent())
1107 return false;
1108
1110 return false;
1111
1112 return true;
1113}
1114
1115
1116bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &I,
1117 MachineLoop *CurLoop) {
1118 if (!IsLICMCandidate(I, CurLoop)) {
1119 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
1120 return false;
1121 }
1123}
1124
1125
1126
1127bool MachineLICMImpl::HasLoopPHIUse(const MachineInstr *MI,
1128 MachineLoop *CurLoop) {
1130 do {
1131 MI = Work.pop_back_val();
1132 for (const MachineOperand &MO : MI->all_defs()) {
1135 continue;
1136 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1137
1138 if (UseMI.isPHI()) {
1139
1140
1142 return true;
1143
1144
1145
1147 return true;
1148 continue;
1149 }
1150
1152 Work.push_back(&UseMI);
1153 }
1154 }
1155 } while (!Work.empty());
1156 return false;
1157}
1158
1159
1160
1161bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1163 MachineLoop *CurLoop) const {
1164 if (MRI->use_nodbg_empty(Reg))
1165 return false;
1166
1167 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1168 if (UseMI.isCopyLike())
1169 continue;
1171 continue;
1172 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1173 const MachineOperand &MO = UseMI.getOperand(i);
1175 continue;
1177 if (MOReg != Reg)
1178 continue;
1179
1181 return true;
1182 }
1183
1184
1185 break;
1186 }
1187
1188 return false;
1189}
1190
1191
1192
1193bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
1195 return true;
1196
1197 bool isCheap = false;
1198 unsigned NumDefs = MI.getDesc().getNumDefs();
1199 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1200 MachineOperand &DefMO = MI.getOperand(i);
1201 if (!DefMO.isReg() || !DefMO.isDef())
1202 continue;
1203 --NumDefs;
1206 continue;
1207
1209 return false;
1210 isCheap = true;
1211 }
1212
1213 return isCheap;
1214}
1215
1216
1217
1218bool MachineLICMImpl::CanCauseHighRegPressure(
1219 const SmallDenseMap<unsigned, int> &Cost, bool CheapInstr) {
1220 for (const auto &[Class, Weight] : Cost) {
1221 if (Weight <= 0)
1222 continue;
1223
1224 int Limit = RegLimit[Class];
1225
1226
1227
1229 return true;
1230
1231 for (const auto &RP : BackTrace)
1232 if (static_cast<int>(RP[Class]) + Weight >= Limit)
1233 return true;
1234 }
1235
1236 return false;
1237}
1238
1239
1240
1241
1242void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1243
1244
1245 auto Cost = calcRegisterCost(MI, false,
1246 false);
1247
1248
1249 for (auto &RP : BackTrace)
1250 for (const auto &[Class, Weight] : Cost)
1252}
1253
1254
1255
1256bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &MI,
1257 MachineLoop *CurLoop) {
1258 if (MI.isImplicitDef())
1259 return true;
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1274 return true;
1275
1276 bool CheapInstr = IsCheapInstruction(MI);
1277 bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);
1278
1279
1280 if (CheapInstr && CreatesCopy) {
1281 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1282 return false;
1283 }
1284
1285
1286
1288 return true;
1289
1290
1291
1292 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1293 const MachineOperand &MO = MI.getOperand(i);
1295 continue;
1298 continue;
1299 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {
1301 ++NumHighLatency;
1302 return true;
1303 }
1304 }
1305
1306
1307
1308
1309
1310
1311
1312 auto Cost = calcRegisterCost(&MI, false,
1313 false);
1314
1315
1316
1317 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1319 ++NumLowRP;
1320 return true;
1321 }
1322
1323
1324 if (CreatesCopy) {
1325 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1326 return false;
1327 }
1328
1329
1330
1331
1333 (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {
1335 return false;
1336 }
1337
1338
1339
1340
1341 if (MI.isCopy() || MI.isRegSequence()) {
1342 Register DefReg = MI.getOperand(0).getReg();
1345 [this](const MachineOperand &UseOp) {
1346 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1347 MRI->isConstantPhysReg(UseOp.getReg());
1348 }) &&
1349 IsLoopInvariantInst(MI, CurLoop) &&
1350 any_of(MRI->use_nodbg_instructions(DefReg),
1351 [&CurLoop, this, DefReg,
1353 if (!CurLoop->contains(&UseMI))
1354 return false;
1355
1356
1357
1358
1359
1360 if (CanCauseHighRegPressure(Cost, false) &&
1361 !CurLoop->isLoopInvariant(UseMI, DefReg))
1362 return false;
1363
1364 return true;
1365 }))
1366 return true;
1367 }
1368
1369
1370
1372 .isDereferenceableInvariantLoad()) {
1373 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1374 return false;
1375 }
1376
1377 return true;
1378}
1379
1380
1381
1382
1383MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI,
1384 MachineLoop *CurLoop) {
1385
1386 if (MI->canFoldAsLoad())
1387 return nullptr;
1388
1389
1390
1391
1392 if (->isDereferenceableInvariantLoad())
1393 return nullptr;
1394
1395
1396 unsigned LoadRegIndex;
1397 unsigned NewOpc =
1399 true,
1400 false,
1401 &LoadRegIndex);
1402 if (NewOpc == 0) return nullptr;
1403 const MCInstrDesc &MID = TII->get(NewOpc);
1404 MachineFunction &MF = *MI->getMF();
1405 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex);
1406
1408
1409 SmallVector<MachineInstr *, 2> NewMIs;
1411 true,
1412 false, NewMIs);
1415 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1416 "succeeded!");
1418 "Unfolded a load into multiple instructions!");
1419 MachineBasicBlock *MBB = MI->getParent();
1423
1424
1425 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1426 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1427 NewMIs[0]->eraseFromParent();
1428 NewMIs[1]->eraseFromParent();
1429 return nullptr;
1430 }
1431
1432
1433 UpdateRegPressure(NewMIs[1]);
1434
1435
1436
1437
1438 if (MI->shouldUpdateAdditionalCallInfo())
1440
1441 MI->eraseFromParent();
1442 return NewMIs[0];
1443}
1444
1445
1446
1447
1448void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1449 for (MachineInstr &MI : *BB)
1451}
1452
1453
1454
1455void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1458
1459
1460
1461 while (!Worklist.empty()) {
1462 auto *L = Worklist.pop_back_val();
1463 AllowedToHoistLoads[L] = true;
1466 }
1467
1468
1469
1470
1471
1472
1473
1474
1475 for (auto *Loop : reverse(LoopsInPreOrder)) {
1476 for (auto *MBB : Loop->blocks()) {
1477
1478 if (!AllowedToHoistLoads[Loop])
1479 continue;
1481 if (.isLoadFoldBarrier() &&
.mayStore() &&
.isCall() &&
1482 !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
1483 continue;
1484 for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
1485 AllowedToHoistLoads[L] = false;
1486 break;
1487 }
1488 }
1489 }
1490}
1491
1492
1493
1494MachineInstr *
1495MachineLICMImpl::LookForDuplicate(const MachineInstr *MI,
1496 std::vector<MachineInstr *> &PrevMIs) {
1497 for (MachineInstr *PrevMI : PrevMIs)
1499 return PrevMI;
1500
1501 return nullptr;
1502}
1503
1504
1505
1506
1507
1508bool MachineLICMImpl::EliminateCSE(
1509 MachineInstr *MI,
1510 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1511
1512
1513 if (MI->isImplicitDef())
1514 return false;
1515
1516
1517
1518 if (MI->mayLoad() && ->isDereferenceableInvariantLoad())
1519 return false;
1520
1521 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1523
1524
1525
1526 SmallVector<unsigned, 2> Defs;
1527 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1528 const MachineOperand &MO = MI->getOperand(i);
1529
1530
1532 MO.getReg() == Dup->getOperand(i).getReg()) &&
1533 "Instructions with different phys regs are not identical!");
1534
1537 }
1538
1540 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1541 unsigned Idx = Defs[i];
1543 Register DupReg = Dup->getOperand(Idx).getReg();
1544 OrigRCs.push_back(MRI->getRegClass(DupReg));
1545
1546 if (->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1547
1548 for (unsigned j = 0; j != i; ++j)
1549 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1550 return false;
1551 }
1552 }
1553
1554 for (unsigned Idx : Defs) {
1556 Register DupReg = Dup->getOperand(Idx).getReg();
1557 MRI->replaceRegWith(Reg, DupReg);
1558 MRI->clearKillFlags(DupReg);
1559
1560 if (->use_nodbg_empty(DupReg))
1561 Dup->getOperand(Idx).setIsDead(false);
1562 }
1563
1564 MI->eraseFromParent();
1565 ++NumCSEed;
1566 return true;
1567 }
1568 return false;
1569}
1570
1571
1572
1573bool MachineLICMImpl::MayCSE(MachineInstr *MI) {
1574 if (MI->mayLoad() && ->isDereferenceableInvariantLoad())
1575 return false;
1576
1577 unsigned Opcode = MI->getOpcode();
1578 for (auto &Map : CSEMap) {
1579
1581 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1582 Map.second.find(Opcode);
1583
1584
1585 if (CI == Map.second.end() || MI->isImplicitDef())
1586 continue;
1587 if (LookForDuplicate(MI, CI->second) != nullptr)
1588 return true;
1589 }
1590 }
1591
1592 return false;
1593}
1594
1595
1596
1597
1598unsigned MachineLICMImpl::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
1599 MachineLoop *CurLoop) {
1600 MachineBasicBlock *SrcBlock = MI->getParent();
1601
1602
1605 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1606 ++NumNotHoistedDueToHotness;
1607 return HoistResult::NotHoisted;
1608 }
1609
1610 bool HasExtractHoistableLoad = false;
1611 if (!IsLoopInvariantInst(*MI, CurLoop) ||
1612 !IsProfitableToHoist(*MI, CurLoop)) {
1613
1614 MI = ExtractHoistableLoad(MI, CurLoop);
1615 if ()
1616 return HoistResult::NotHoisted;
1617 HasExtractHoistableLoad = true;
1618 }
1619
1620
1621
1622 if (MI->mayStore())
1623 NumStoreConst++;
1624
1625
1626
1628 dbgs() << "Hoisting " << *MI;
1629 if (MI->getParent()->getBasicBlock())
1633 dbgs() << "\n";
1634 });
1635
1636
1637
1638 if (FirstInLoop) {
1639 InitCSEMap(Preheader);
1640 FirstInLoop = false;
1641 }
1642
1643
1644 unsigned Opcode = MI->getOpcode();
1645 bool HasCSEDone = false;
1646 for (auto &Map : CSEMap) {
1647
1649 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1650 Map.second.find(Opcode);
1651 if (CI != Map.second.end()) {
1652 if (EliminateCSE(MI, CI)) {
1653 HasCSEDone = true;
1654 break;
1655 }
1656 }
1657 }
1658 }
1659
1660 if (!HasCSEDone) {
1661
1663
1664
1665
1666
1667 assert(->isDebugInstr() && "Should not hoist debug inst");
1669
1670
1671 UpdateBackTraceRegPressure(MI);
1672
1673
1674
1675
1676 for (MachineOperand &MO : MI->all_defs())
1678 MRI->clearKillFlags(MO.getReg());
1679
1680 CSEMap[Preheader][Opcode].push_back(MI);
1681 }
1682
1683 ++NumHoisted;
1685
1686 if (HasCSEDone || HasExtractHoistableLoad)
1687 return HoistResult::Hoisted | HoistResult::ErasedMI;
1688 return HoistResult::Hoisted;
1689}
1690
1691
1692MachineBasicBlock *MachineLICMImpl::getOrCreatePreheader(MachineLoop *CurLoop) {
1693
1694
1695 if (MachineBasicBlock *Preheader = CurLoop->getLoopPreheader())
1696 return Preheader;
1697
1698
1699
1701 MachineBasicBlock *NewPreheader = Pred->SplitCriticalEdge(
1702 CurLoop->getHeader(), LegacyPass, MFAM, nullptr, MDTU);
1703 if (NewPreheader)
1705 return NewPreheader;
1706 }
1707
1708 return nullptr;
1709}
1710
1711
1712
1713bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1714 MachineBasicBlock *TgtBlock) {
1715
1718
1719
1720 if (!SrcBF)
1721 return true;
1722
1723 double Ratio = (double)DstBF / SrcBF;
1724
1725
1727}
1728
1729template <typename DerivedT, bool PreRegAlloc>
1732 bool Changed = MachineLICMImpl(PreRegAlloc, nullptr, &MFAM).run(MF);
1737 return PA;
1738}