LLVM: lib/CodeGen/MachineLICM.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
53#include
54#include
55#include
56#include
57
58using namespace llvm;
59
60#define DEBUG_TYPE "machinelicm"
61
64 cl::desc("MachineLICM should avoid speculation"),
66
69 cl::desc("MachineLICM should hoist even cheap instructions"),
71
74 cl::desc("Hoist invariant stores"),
76
78 cl::desc("Hoist invariant loads"),
80
81
82
85 cl::desc("Do not hoist instructions if target"
86 "block is N times hotter than the source."),
88
90
93 cl::desc("Disable hoisting instructions to"
94 " hotter blocks"),
97 "disable the feature"),
99 "enable the feature when using profile data"),
101 "enable the feature with/wo profile data")));
102
104 "Number of machine instructions hoisted out of loops");
106 "Number of instructions hoisted in low reg pressure situation");
108 "Number of high latency instructions hoisted");
110 "Number of hoisted machine instructions CSEed");
112 "Number of machine instructions hoisted out of loops post regalloc");
114 "Number of stores of const phys reg hoisted out of loops");
116 "Number of instructions not hoisted due to block frequency");
117
118namespace {
119 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
120
121 class MachineLICMImpl {
128 bool PreRegAlloc = false;
129 bool HasProfileData = false;
130 Pass *LegacyPass;
132
133
134 AliasAnalysis *AA = nullptr;
136 MachineLoopInfo *MLI = nullptr;
138
139
140 bool Changed = false;
141 bool FirstInLoop = false;
142
143
144
146
147
149
152 if (Inserted) {
155 It->second = std::move(ExitBlocks);
156 }
158 }
159
160
163
164
165
167
168
170
171
174 CSEMap;
175
176 enum {
177 SpeculateFalse = 0,
178 SpeculateTrue = 1,
179 SpeculateUnknown = 2
180 };
181
182
183
184
185 unsigned SpeculationState = SpeculateUnknown;
186
187 public:
188 MachineLICMImpl(bool PreRegAlloc, Pass *LegacyPass,
190 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
191 assert((LegacyPass || MFAM) && "LegacyPass or MFAM must be provided");
192 assert(!(LegacyPass && MFAM) &&
193 "LegacyPass and MFAM cannot be provided at the same time");
194 }
195
197
198 void releaseMemory() {
201 RegLimit.clear();
202 BackTrace.clear();
203 CSEMap.clear();
204 ExitBlockMap.clear();
205 }
206
207 private:
208
209 struct CandidateInfo {
211 unsigned Def;
212 int FI;
213
214 CandidateInfo(MachineInstr *mi, unsigned def, int fi)
215 : MI(mi), Def(def), FI(fi) {}
216 };
217
218 void HoistRegionPostRA(MachineLoop *CurLoop,
220
223
228
230
232
234
236
239
241
243 bool Cheap);
244
245 void UpdateBackTraceRegPressure(const MachineInstr *MI);
246
248
250
251 bool isTriviallyReMaterializable(const MachineInstr &MI) const;
252
254
256
257 void ExitScopeIfDone(
261
264
266
268 bool ConsiderSeen,
269 bool ConsiderUnseenAsDef);
270
272 bool ConsiderUnseenAsDef = false);
273
275
277 std::vector<MachineInstr *> &PrevMIs);
278
279 bool
281 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
282
284
287
289
290 void InitializeLoadsHoistableLoops();
291
296 };
297
299 bool PreRegAlloc;
300
301 public:
302 MachineLICMBase(char &ID, bool PreRegAlloc)
304
306
315 }
316 };
317
318 class MachineLICM : public MachineLICMBase {
319 public:
320 static char ID;
321 MachineLICM() : MachineLICMBase(ID, false) {
323 }
324 };
325
326 class EarlyMachineLICM : public MachineLICMBase {
327 public:
328 static char ID;
329 EarlyMachineLICM() : MachineLICMBase(ID, true) {
331 }
332 };
333
334}
335
336char MachineLICM::ID;
337char EarlyMachineLICM::ID;
338
341
343 "Machine Loop Invariant Code Motion", false, false)
350
359
360bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
361 if (skipFunction(MF.getFunction()))
362 return false;
363
364 MachineLICMImpl Impl(PreRegAlloc, this, nullptr);
365 return Impl.run(MF);
366}
367
368#define GET_RESULT(RESULT, GETTER, INFIX) \
369 ((LegacyPass) \
370 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
371 : &MFAM->getResult<RESULT##Analysis>(MF))
372
374 AA = MFAM != nullptr
376 .getManager()
380 MachineDomTreeUpdater::UpdateStrategy::Lazy);
381 MDTU = &DTU;
385 : nullptr;
386
387 Changed = FirstInLoop = false;
389 TII = ST.getInstrInfo();
390 TLI = ST.getTargetLowering();
391 TRI = ST.getRegisterInfo();
394 SchedModel.init(&ST);
395
397
398 if (PreRegAlloc)
399 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
400 else
401 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
403
404 if (PreRegAlloc) {
405
406 unsigned NumRPS = TRI->getNumRegPressureSets();
407 RegPressure.resize(NumRPS);
408 std::fill(RegPressure.begin(), RegPressure.end(), 0);
409 RegLimit.resize(NumRPS);
410 for (unsigned i = 0, e = NumRPS; i != e; ++i)
411 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
412 }
413
415 InitializeLoadsHoistableLoops();
416
418 while (!Worklist.empty()) {
421
422 if (!PreRegAlloc)
423 HoistRegionPostRA(CurLoop, CurPreheader);
424 else {
425
426
428 FirstInLoop = true;
429 HoistOutOfLoop(N, CurLoop, CurPreheader);
430 CSEMap.clear();
431 }
432 }
433 releaseMemory();
434 return Changed;
435}
436
437
439
440
441 if (->mayStore())
442 return false;
443
444
445 if (MI->memoperands_empty())
446 return true;
448 if (->isStore() ||
->getPseudoValue())
449 continue;
451 dyn_cast(MemOp->getPseudoValue())) {
452 if (Value->getFrameIndex() == FI)
453 return true;
454 }
455 }
456 return false;
457}
458
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492 BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());
493 const unsigned NumRegs = TRI.getNumRegs();
494 const unsigned MaskWords = (NumRegs + 31) / 32;
495 for (unsigned K = 0; K < MaskWords; ++K) {
496 const uint32_t Word = Mask[K];
497 for (unsigned Bit = 0; Bit < 32; ++Bit) {
498 const unsigned PhysReg = (K * 32) + Bit;
499 if (PhysReg == NumRegs)
500 break;
501
502 if (PhysReg && !((Word >> Bit) & 1)) {
504 RUsFromRegsNotInMask.set(*RUI);
505 }
506 }
507 }
508
509 RUs |= RUsFromRegsNotInMask;
510}
511
512
513
519 bool RuledOut = false;
520 bool HasNonInvariantUse = false;
521 unsigned Def = 0;
523 if (MO.isFI()) {
524
525 int FI = MO.getIndex();
526 if (!StoredFIs.count(FI) &&
529 StoredFIs.insert(FI);
530 HasNonInvariantUse = true;
531 continue;
532 }
533
534
535
536 if (MO.isRegMask()) {
538 continue;
539 }
540
541 if (!MO.isReg())
542 continue;
544 if (!Reg)
545 continue;
546 assert(Reg.isPhysical() && "Not expecting virtual register!");
547
548 if (!MO.isDef()) {
549 if (!HasNonInvariantUse) {
551
552
553 if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {
554 HasNonInvariantUse = true;
555 break;
556 }
557 }
558 }
559 continue;
560 }
561
562 if (MO.isImplicit()) {
564 RUClobbers.set(*RUI);
565 if (!MO.isDead())
566
567 RuledOut = true;
568
569
570 continue;
571 }
572
573
574
575 if (Def)
576 RuledOut = true;
577 else
579
580
581
582
584 if (RUDefs.test(*RUI)) {
585 RUClobbers.set(*RUI);
586 RuledOut = true;
587 } else if (RUClobbers.test(*RUI)) {
588
589
590 RuledOut = true;
591 }
592
593 RUDefs.set(*RUI);
594 }
595 }
596
597
598
599 if (Def && !RuledOut) {
600 int FI = std::numeric_limits::min();
601 if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||
603 Candidates.push_back(CandidateInfo(MI, Def, FI));
604 }
605}
606
607
608
609void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop,
611 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
612 if (!Preheader)
613 return;
614
615 unsigned NumRegUnits = TRI->getNumRegUnits();
616 BitVector RUDefs(NumRegUnits);
617 BitVector RUClobbers(NumRegUnits);
618
621
622
623
625
626
628 if (ML && ML->getHeader()->isEHPad()) continue;
629
630
631
632
633 for (const auto &LI : BB->liveins()) {
635 RUDefs.set(*RUI);
636 }
637
638
639 if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
641
642 SpeculationState = SpeculateUnknown;
644 ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
645 }
646
647
650 if (TI != Preheader->end()) {
652 if (!MO.isReg())
653 continue;
655 if (!Reg)
656 continue;
658 TermRUs.set(*RUI);
659 }
660 }
661
662
663
664
665
666
667
668
669
670 for (CandidateInfo &Candidate : Candidates) {
671 if (Candidate.FI != std::numeric_limits::min() &&
672 StoredFIs.count(Candidate.FI))
673 continue;
674
675 unsigned Def = Candidate.Def;
676 bool Safe = true;
678 if (RUClobbers.test(*RUI) || TermRUs.test(*RUI)) {
679 Safe = false;
680 break;
681 }
682 }
683
684 if (!Safe)
685 continue;
686
689 if (!MO.getReg())
690 continue;
692 if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {
693
694
695 Safe = false;
696 break;
697 }
698 }
699
700 if (!Safe)
701 break;
702 }
703
704 if (Safe)
705 HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);
706 }
707}
708
709
710
713 if (!BB->isLiveIn(Reg))
714 BB->addLiveIn(Reg);
717 if (!MO.getReg())
718 continue;
719 if (TRI->regsOverlap(Reg, MO.getReg()))
720 MO.setIsKill(false);
721 }
722 }
723 }
724}
725
726
727
728void MachineLICMImpl::HoistPostRA(MachineInstr *MI, unsigned Def,
731 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
732
733
734
737 << *MI);
738
739
742
743
744
745
746 assert(->isDebugInstr() && "Should not hoist debug inst");
748
749
750
751
752 AddToLiveIns(Def, CurLoop);
753
754 ++NumPostRAHoisted;
755 Changed = true;
756}
757
758
759
760bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
762 if (SpeculationState != SpeculateUnknown)
763 return SpeculationState == SpeculateFalse;
764
765 if (BB != CurLoop->getHeader()) {
766
769 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
771 SpeculationState = SpeculateTrue;
772 return false;
773 }
774 }
775
776 SpeculationState = SpeculateFalse;
777 return true;
778}
779
780
781
782
783
784bool MachineLICMImpl::isTriviallyReMaterializable(
786 if (->isTriviallyReMaterializable(MI))
787 return false;
788
790 if (MO.getReg().isVirtual())
791 return false;
792 }
793
794 return true;
795}
796
799
800
801 BackTrace.push_back(RegPressure);
802}
803
807}
808
809
810
811
812void MachineLICMImpl::ExitScopeIfDone(
816 if (OpenChildren[Node])
817 return;
818
819 for(;;) {
820 ExitScope(Node->getBlock());
821
823 if (!Parent || --OpenChildren[Parent] != 0)
824 break;
825 Node = Parent;
826 }
827}
828
829
830
831
832
836 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
837 if (!Preheader)
838 return;
839
844
845
847 while (!WorkList.empty()) {
849 assert(Node && "Null dominator tree node?");
851
852
853
855 if (ML && ML->getHeader()->isEHPad())
856 continue;
857
858
860 continue;
861
863 unsigned NumChildren = Node->getNumChildren();
864
865
866
867
869 NumChildren = 0;
870
871 OpenChildren[Node] = NumChildren;
872 if (NumChildren) {
873
874
875
877 ParentMap[Child] = Node;
879 }
880 }
881 }
882
883 if (Scopes.size() == 0)
884 return;
885
886
888 BackTrace.clear();
889 InitRegPressure(Preheader);
890
891
894
895 EnterScope(MBB);
896
897
898 SpeculationState = SpeculateUnknown;
900 unsigned HoistRes = HoistResult::NotHoisted;
901 HoistRes = Hoist(&MI, Preheader, CurLoop);
902 if (HoistRes & HoistResult::NotHoisted) {
903
904
907 L = L->getParentLoop())
909
910 while (!InnerLoopWorkList.empty()) {
913 if (InnerLoopPreheader) {
914 HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);
915 if (HoistRes & HoistResult::Hoisted)
916 break;
917 }
918 }
919 }
920
921 if (HoistRes & HoistResult::ErasedMI)
922 continue;
923
924 UpdateRegPressure(&MI);
925 }
926
927
928 ExitScopeIfDone(Node, OpenChildren, ParentMap);
929 }
930}
931
934}
935
936
937
938
941
942
943
944
945
951 }
952
954 UpdateRegPressure(&MI, true);
955}
956
957
958void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,
959 bool ConsiderUnseenAsDef) {
960 auto Cost = calcRegisterCost(MI, true, ConsiderUnseenAsDef);
961 for (const auto &RPIdAndCost : Cost) {
962 unsigned Class = RPIdAndCost.first;
963 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
965 else
967 }
968}
969
970
971
972
973
974
975
977MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
978 bool ConsiderUnseenAsDef) {
980 if (MI->isImplicitDef())
982 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
985 continue;
987 if (.isVirtual())
988 continue;
989
990
991 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
993
995 int RCCost = 0;
997 RCCost = W.RegWeight;
998 else {
1000 if (isNew && !isKill && ConsiderUnseenAsDef)
1001
1002 RCCost = W.RegWeight;
1003 else if (!isNew && isKill)
1004 RCCost = -W.RegWeight;
1005 }
1006 if (RCCost == 0)
1007 continue;
1008 const int *PS = TRI->getRegClassPressureSets(RC);
1009 for (; *PS != -1; ++PS)
1010 Cost[*PS] += RCCost;
1011 }
1012 return Cost;
1013}
1014
1015
1016
1018 assert(MI.mayLoad() && "Expected MI that loads!");
1019
1020
1021
1022 if (MI.memoperands_empty())
1023 return true;
1024
1027 if (PSV->isGOT() || PSV->isConstantPool())
1028 return true;
1029
1030 return false;
1031}
1032
1033
1034
1035
1036
1037
1038
1039
1043
1044 bool FoundCallerPresReg = false;
1045 if (.mayStore() || MI.hasUnmodeledSideEffects() ||
1046 (MI.getNumOperands() == 0))
1047 return false;
1048
1049
1051 if (MO.isReg()) {
1053
1054
1055 if (Reg.isVirtual())
1056 Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
1057 if (Reg.isVirtual())
1058 return false;
1059 if (->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
1060 return false;
1061 else
1062 FoundCallerPresReg = true;
1063 } else if (!MO.isImm()) {
1064 return false;
1065 }
1066 }
1067 return FoundCallerPresReg;
1068}
1069
1070
1071
1072
1073
1077
1078
1079
1080 if (.isCopy())
1081 return false;
1082
1084
1085 Register CopySrcReg = MI.getOperand(1).getReg();
1087 return false;
1088
1089 if (->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
1090 return false;
1091
1092 Register CopyDstReg = MI.getOperand(0).getReg();
1093
1094 assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
1095
1098 return true;
1099 }
1100 return false;
1101}
1102
1103
1104
1106
1107 bool DontMoveAcrossStore = || !AllowedToHoistLoads[CurLoop];
1108 if ((.isSafeToMove(DontMoveAcrossStore)) &&
1110 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
1111 return false;
1112 }
1113
1114
1115
1116
1117
1118
1119
1121 !IsGuaranteedToExecute(I.getParent(), CurLoop)) {
1122 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
1123 return false;
1124 }
1125
1126
1127
1128
1129
1130 if (I.isConvergent())
1131 return false;
1132
1133 if (->shouldHoist(I, CurLoop))
1134 return false;
1135
1136 return true;
1137}
1138
1139
1140bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &I,
1142 if (!IsLICMCandidate(I, CurLoop)) {
1143 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
1144 return false;
1145 }
1147}
1148
1149
1150
1151bool MachineLICMImpl::HasLoopPHIUse(const MachineInstr *MI,
1154 do {
1155 MI = Work.pop_back_val();
1158 if (.isVirtual())
1159 continue;
1161
1162 if (UseMI.isPHI()) {
1163
1164
1166 return true;
1167
1168
1169
1171 return true;
1172 continue;
1173 }
1174
1176 Work.push_back(&UseMI);
1177 }
1178 }
1179 } while (!Work.empty());
1180 return false;
1181}
1182
1183
1184
1185bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1188 if (MRI->use_nodbg_empty(Reg))
1189 return false;
1190
1192 if (UseMI.isCopyLike())
1193 continue;
1195 continue;
1196 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1199 continue;
1201 if (MOReg != Reg)
1202 continue;
1203
1204 if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1205 return true;
1206 }
1207
1208
1209 break;
1210 }
1211
1212 return false;
1213}
1214
1215
1216
1217bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
1219 return true;
1220
1221 bool isCheap = false;
1222 unsigned NumDefs = MI.getDesc().getNumDefs();
1223 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1225 if (!DefMO.isReg() || !DefMO.isDef())
1226 continue;
1227 --NumDefs;
1229 if (Reg.isPhysical())
1230 continue;
1231
1232 if (->hasLowDefLatency(SchedModel, MI, i))
1233 return false;
1234 isCheap = true;
1235 }
1236
1237 return isCheap;
1238}
1239
1240
1241
1242bool MachineLICMImpl::CanCauseHighRegPressure(
1244 for (const auto &RPIdAndCost : Cost) {
1245 if (RPIdAndCost.second <= 0)
1246 continue;
1247
1248 unsigned Class = RPIdAndCost.first;
1249 int Limit = RegLimit[Class];
1250
1251
1252
1254 return true;
1255
1256 for (const auto &RP : BackTrace)
1257 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1258 return true;
1259 }
1260
1261 return false;
1262}
1263
1264
1265
1266
1267void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1268
1269
1270 auto Cost = calcRegisterCost(MI, false,
1271 false);
1272
1273
1274 for (auto &RP : BackTrace)
1275 for (const auto &RPIdAndCost : Cost)
1276 RP[RPIdAndCost.first] += RPIdAndCost.second;
1277}
1278
1279
1280
1281bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &MI,
1283 if (MI.isImplicitDef())
1284 return true;
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1299 return true;
1300
1301 bool CheapInstr = IsCheapInstruction(MI);
1302 bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);
1303
1304
1305 if (CheapInstr && CreatesCopy) {
1306 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1307 return false;
1308 }
1309
1310
1311
1312 if (isTriviallyReMaterializable(MI))
1313 return true;
1314
1315
1316
1317 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1320 continue;
1322 if (.isVirtual())
1323 continue;
1324 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {
1326 ++NumHighLatency;
1327 return true;
1328 }
1329 }
1330
1331
1332
1333
1334
1335
1336
1337 auto Cost = calcRegisterCost(&MI, false,
1338 false);
1339
1340
1341
1342 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1344 ++NumLowRP;
1345 return true;
1346 }
1347
1348
1349 if (CreatesCopy) {
1350 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1351 return false;
1352 }
1353
1354
1355
1356
1358 (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {
1360 return false;
1361 }
1362
1363
1364
1365
1366 if (MI.isCopy() || MI.isRegSequence()) {
1367 Register DefReg = MI.getOperand(0).getReg();
1371 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1372 MRI->isConstantPhysReg(UseOp.getReg());
1373 }) &&
1374 IsLoopInvariantInst(MI, CurLoop) &&
1375 any_of(MRI->use_nodbg_instructions(DefReg),
1376 [&CurLoop, this, DefReg,
1378 if (!CurLoop->contains(&UseMI))
1379 return false;
1380
1381
1382
1383
1384
1385 if (CanCauseHighRegPressure(Cost, false) &&
1386 !CurLoop->isLoopInvariant(UseMI, DefReg))
1387 return false;
1388
1389 return true;
1390 }))
1391 return true;
1392 }
1393
1394
1395
1396 if (!isTriviallyReMaterializable(MI) &&
1397 .isDereferenceableInvariantLoad()) {
1398 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1399 return false;
1400 }
1401
1402 return true;
1403}
1404
1405
1406
1407
1410
1411 if (MI->canFoldAsLoad())
1412 return nullptr;
1413
1414
1415
1416
1417 if (->isDereferenceableInvariantLoad())
1418 return nullptr;
1419
1420
1421 unsigned LoadRegIndex;
1422 unsigned NewOpc =
1423 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1424 true,
1425 false,
1426 &LoadRegIndex);
1427 if (NewOpc == 0) return nullptr;
1431
1433
1435 bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1436 true,
1437 false, NewMIs);
1440 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1441 "succeeded!");
1443 "Unfolded a load into multiple instructions!");
1448
1449
1450 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1451 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1452 NewMIs[0]->eraseFromParent();
1453 NewMIs[1]->eraseFromParent();
1454 return nullptr;
1455 }
1456
1457
1458 UpdateRegPressure(NewMIs[1]);
1459
1460
1461
1462
1463 if (MI->shouldUpdateAdditionalCallInfo())
1465
1466 MI->eraseFromParent();
1467 return NewMIs[0];
1468}
1469
1470
1471
1472
1476}
1477
1478
1479
1480void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1483
1484
1485
1486 while (!Worklist.empty()) {
1487 auto *L = Worklist.pop_back_val();
1488 AllowedToHoistLoads[L] = true;
1490 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
1491 L->getSubLoops().end());
1492 }
1493
1494
1495
1496
1497
1498
1499
1500
1501 for (auto *Loop : reverse(LoopsInPreOrder)) {
1503
1504 if (!AllowedToHoistLoads[Loop])
1505 continue;
1507 if (.isLoadFoldBarrier() &&
.mayStore() &&
.isCall() &&
1508 !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
1509 continue;
1511 AllowedToHoistLoads[L] = false;
1512 break;
1513 }
1514 }
1515 }
1516}
1517
1518
1519
1521MachineLICMImpl::LookForDuplicate(const MachineInstr *MI,
1522 std::vector<MachineInstr *> &PrevMIs) {
1524 if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1525 return PrevMI;
1526
1527 return nullptr;
1528}
1529
1530
1531
1532
1533
1534bool MachineLICMImpl::EliminateCSE(
1536 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1537
1538
1539 if (MI->isImplicitDef())
1540 return false;
1541
1542
1543
1544 if (MI->mayLoad() && ->isDereferenceableInvariantLoad())
1545 return false;
1546
1547 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1549
1550
1551
1553 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1555
1556
1558 MO.getReg() == Dup->getOperand(i).getReg()) &&
1559 "Instructions with different phys regs are not identical!");
1560
1563 }
1564
1566 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1567 unsigned Idx = Defs[i];
1569 Register DupReg = Dup->getOperand(Idx).getReg();
1570 OrigRCs.push_back(MRI->getRegClass(DupReg));
1571
1572 if (->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1573
1574 for (unsigned j = 0; j != i; ++j)
1575 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1576 return false;
1577 }
1578 }
1579
1580 for (unsigned Idx : Defs) {
1582 Register DupReg = Dup->getOperand(Idx).getReg();
1583 MRI->replaceRegWith(Reg, DupReg);
1584 MRI->clearKillFlags(DupReg);
1585
1586 if (->use_nodbg_empty(DupReg))
1587 Dup->getOperand(Idx).setIsDead(false);
1588 }
1589
1590 MI->eraseFromParent();
1591 ++NumCSEed;
1592 return true;
1593 }
1594 return false;
1595}
1596
1597
1598
1600 if (MI->mayLoad() && ->isDereferenceableInvariantLoad())
1601 return false;
1602
1603 unsigned Opcode = MI->getOpcode();
1604 for (auto &Map : CSEMap) {
1605
1608 Map.second.find(Opcode);
1609
1610
1611 if (CI == Map.second.end() || MI->isImplicitDef())
1612 continue;
1613 if (LookForDuplicate(MI, CI->second) != nullptr)
1614 return true;
1615 }
1616 }
1617
1618 return false;
1619}
1620
1621
1622
1623
1627
1628
1631 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1632 ++NumNotHoistedDueToHotness;
1633 return HoistResult::NotHoisted;
1634 }
1635
1636 bool HasExtractHoistableLoad = false;
1637 if (!IsLoopInvariantInst(*MI, CurLoop) ||
1638 !IsProfitableToHoist(*MI, CurLoop)) {
1639
1640 MI = ExtractHoistableLoad(MI, CurLoop);
1641 if ()
1642 return HoistResult::NotHoisted;
1643 HasExtractHoistableLoad = true;
1644 }
1645
1646
1647
1648 if (MI->mayStore())
1649 NumStoreConst++;
1650
1651
1652
1654 dbgs() << "Hoisting " << *MI;
1655 if (MI->getParent()->getBasicBlock())
1659 dbgs() << "\n";
1660 });
1661
1662
1663
1664 if (FirstInLoop) {
1665 InitCSEMap(Preheader);
1666 FirstInLoop = false;
1667 }
1668
1669
1670 unsigned Opcode = MI->getOpcode();
1671 bool HasCSEDone = false;
1672 for (auto &Map : CSEMap) {
1673
1676 Map.second.find(Opcode);
1677 if (CI != Map.second.end()) {
1678 if (EliminateCSE(MI, CI)) {
1679 HasCSEDone = true;
1680 break;
1681 }
1682 }
1683 }
1684 }
1685
1686 if (!HasCSEDone) {
1687
1689
1690
1691
1692
1693 assert(->isDebugInstr() && "Should not hoist debug inst");
1695
1696
1697 UpdateBackTraceRegPressure(MI);
1698
1699
1700
1701
1704 MRI->clearKillFlags(MO.getReg());
1705
1706 CSEMap[Preheader][Opcode].push_back(MI);
1707 }
1708
1709 ++NumHoisted;
1710 Changed = true;
1711
1712 if (HasCSEDone || HasExtractHoistableLoad)
1713 return HoistResult::Hoisted | HoistResult::ErasedMI;
1714 return HoistResult::Hoisted;
1715}
1716
1717
1719MachineLICMImpl::getCurPreheader(MachineLoop *CurLoop,
1721
1722
1723
1724
1726 return nullptr;
1727
1728 if (!CurPreheader) {
1730 if (!CurPreheader) {
1732 if (!Pred) {
1734 return nullptr;
1735 }
1736
1738 MFAM, nullptr, MDTU);
1739 if (!CurPreheader) {
1741 return nullptr;
1742 }
1743 }
1744 }
1745 return CurPreheader;
1746}
1747
1748
1749
1750bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1752
1755
1756
1757 if (!SrcBF)
1758 return true;
1759
1760 double Ratio = (double)DstBF / SrcBF;
1761
1762
1764}
1765
1766template <typename DerivedT, bool PreRegAlloc>
1769 bool Changed = MachineLICMImpl(PreRegAlloc, nullptr, &MFAM).run(MF);
1770 if (!Changed)
1774 return PA;
1775}
1776
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
This file implements the BitVector class.
COFF::MachineTypes Machine
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &ExitBlocks)
Return true if the specified block is in the list.
Machine Loop Invariant Code false early machinelicm
static cl::opt< bool > HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden)
#define GET_RESULT(RESULT, GETTER, INFIX)
static cl::opt< UseBFI > DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", cl::desc("Disable hoisting instructions to" " hotter blocks"), cl::init(UseBFI::PGO), cl::Hidden, cl::values(clEnumValN(UseBFI::None, "none", "disable the feature"), clEnumValN(UseBFI::PGO, "pgo", "enable the feature when using profile data"), clEnumValN(UseBFI::All, "all", "enable the feature with/wo profile data")))
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)
Return true if this machine instruction loads from global offset table or constant pool.
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI)
static cl::opt< bool > HoistConstLoads("hoist-const-loads", cl::desc("Hoist invariant loads"), cl::init(true), cl::Hidden)
Machine Loop Invariant Code Motion
static cl::opt< bool > AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden)
static bool InstructionStoresToFI(const MachineInstr *MI, int FI)
Return true if instruction stores to the specified frame.
static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI)
static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI, BitVector &RUs, const uint32_t *Mask)
static cl::opt< bool > HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden)
static bool isInvariantStore(const MachineInstr &MI, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI)
static cl::opt< unsigned > BlockFrequencyRatioThreshold("block-freq-ratio-threshold", cl::desc("Do not hoist instructions if target" "block is N times hotter than the source."), cl::init(100), cl::Hidden)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AAResults & getAAResults()
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool test(unsigned Idx) const
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Base class for the actual dominator tree node.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
A specialized PseudoSourceValue for holding FixedStack values, which must include a frame index.
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Describe properties that are true of each instruction in the target description file.
bool isValid() const
Returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
unsigned pred_size() const
MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P, std::vector< SparseBitVector<> > *LiveInSets=nullptr, MachineDomTreeUpdater *MDTU=nullptr)
Split the critical edge from this block to the given successor block, and return the newly created bl...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
pred_iterator pred_begin()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void eraseAdditionalCallInfo(const MachineInstr *MI)
Following functions update call site info.
Representation of each machine instruction.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Analysis pass that exposes the MachineLoopInfo for a machine function.
bool isLoopInvariant(MachineInstr &I, const Register ExcludeReg=0) const
Returns true if the instruction is loop invariant.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
AnalysisType & getAnalysis() const
getAnalysis() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Special value supplied for machine level alias analysis.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void initializeMachineLICMPass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
char & MachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeEarlyMachineLICMPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Each TargetRegisterClass has a per register weight, and weight limit which must be less than the limi...