LLVM: lib/Transforms/Scalar/LoopInterchange.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
48#include
49#include
50#include
51
52using namespace llvm;
53
54#define DEBUG_TYPE "loop-interchange"
55
56STATISTIC(LoopsInterchanged, "Number of loops interchanged");
57
60 cl::desc("Interchange if you gain more than this number"));
61
62
66 "Maximum number of load-store instructions that should be handled "
67 "in the dependency matrix. Higher value may lead to more interchanges "
68 "at the cost of compile-time"));
69
70namespace {
71
73
74
75
76
77
78
79
80
81
82using CharMatrix = std::vector<std::vector>;
83
84
85enum class RuleTy {
86 PerLoopCacheAnalysis,
87 PerInstrOrderCost,
88 ForVectorization,
90};
91
92}
93
94
97 cl::desc("Minimum depth of loop nest considered for the transform"));
98
99
102 cl::desc("Maximum depth of loop nest considered for the transform"));
103
104
108 cl::desc("List of profitability heuristics to be used. They are applied in "
109 "the given order"),
111 RuleTy::PerInstrOrderCost,
112 RuleTy::ForVectorization}),
114 "Prioritize loop cache cost"),
115 clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
116 "Prioritize the IVs order of each instruction"),
117 clEnumValN(RuleTy::ForVectorization, "vectorize",
118 "Prioritize vectorization"),
120 "Ignore profitability, force interchange (does not "
121 "work with other options)")));
122
123#ifndef NDEBUG
126 for (RuleTy Rule : Rules) {
127 if (!Set.insert(Rule).second)
128 return false;
129 if (Rule == RuleTy::Ignore)
130 return false;
131 }
132 return true;
133}
134
136 for (auto &Row : DepMatrix) {
137
138
142 }
143}
144
145
146
147
149 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
150 "Expected Src and Dst to be different instructions in the same BB");
151
152 bool FoundSrc = false;
153 for (const Instruction &I : *(Src->getParent())) {
154 if (&I == Src) {
155 FoundSrc = true;
156 continue;
157 }
158 if (&I == Dst)
159 return FoundSrc;
160 }
161
163}
164#endif
165
171
173
174
176
179 return false;
181 if (!Ld->isSimple())
182 return false;
185 if (!St->isSimple())
186 return false;
188 }
189 }
190 }
191
193 << " Loads and Stores to analyze\n");
195 LLVM_DEBUG(dbgs() << "The transform doesn't support more than "
197 ORE->emit([&]() {
199 L->getStartLoc(), L->getHeader())
200 << "Number of loads/stores exceeded, the supported maximum "
201 "can be increased with option "
202 "-loop-interchange-maxmeminstr-count.";
203 });
204 return false;
205 }
207
208
209
211
212 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
213 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
214 std::vector Dep;
217
219 continue;
220
221 if (auto D = DI->depends(Src, Dst)) {
222 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
223
224
225 if (D->normalize(SE))
226 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
228 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
229 dbgs() << "Found " << DepType
230 << " dependency between Src and Dst\n"
231 << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
232 unsigned Levels = D->getLevels();
234 for (unsigned II = 1; II <= Levels; ++II) {
235
236
237
238
239
240
241 unsigned Dir = D->getDirection(II);
248 else
251 }
252
253
254
255 if (D->isConfused()) {
256 assert(Dep.empty() && "Expected empty dependency vector");
257 Dep.assign(Level, '*');
258 }
259
260 while (Dep.size() != Level) {
261 Dep.push_back('I');
262 }
263
264
265
266 if (all_of(Dep, [](char C) { return C == '*'; })) {
267 ORE->emit([&]() {
269 L->getStartLoc(), L->getHeader())
270 << "All loops have dependencies in all directions.";
271 });
272 return false;
273 }
274
275
276 bool IsKnownForward = true;
277 if (Src->getParent() != Dst->getParent()) {
278
279
280
281 IsKnownForward = false;
282 } else {
283
284
285
287 "Unexpected instructions");
288
289
290
291
292 bool IsReversed = D->getSrc() != Src;
293 if (IsReversed)
294 IsKnownForward = false;
295 }
296
297
298
299 Dep.push_back('<');
300
301
302
303
304
306 StringRef(Dep.data(), Dep.size() - 1), DepMatrix.size());
307
308
309 if (Inserted)
310 DepMatrix.push_back(Dep);
311
312
313
314
315
316 if (!IsKnownForward)
317 DepMatrix[Ite->second].back() = '*';
318 }
319 }
320 }
321
322 return true;
323}
324
325
326
328 unsigned ToIndx) {
329 for (auto &Row : DepMatrix)
330 std::swap(Row[ToIndx], Row[FromIndx]);
331}
332
333
334
335
336
337
338static std::optional
340 for (unsigned char Direction : DV.slice(Begin, End - Begin)) {
342 return true;
344 return false;
345 }
346 return std::nullopt;
347}
348
349
351 unsigned InnerLoopId,
352 unsigned OuterLoopId) {
353 unsigned NumRows = DepMatrix.size();
354 std::vector Cur;
355
356 for (unsigned Row = 0; Row < NumRows; ++Row) {
357
358
359 Cur = DepMatrix[Row];
360
361
362
363
365 continue;
366
367
368
369
371 return false;
372 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
374 return false;
375 }
376 return true;
377}
378
380 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
381 << L.getHeader()->getParent()->getName() << " Loop: %"
382 << L.getHeader()->getName() << '\n');
383 assert(LoopList.empty() && "LoopList should initially be empty!");
384 Loop *CurrentLoop = &L;
385 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
386 while (!Vec->empty()) {
387
388
389
390 if (Vec->size() != 1) {
391 LoopList = {};
392 return;
393 }
394
396 CurrentLoop = Vec->front();
398 }
400}
401
404 unsigned LoopNestDepth = LoopList.size();
405 if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
406 LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
409 Loop *OuterLoop = LoopList.front();
410 ORE.emit([&]() {
414 << "Unsupported depth of loop nest, the supported range is ["
417 });
418 return false;
419 }
420 return true;
421}
422
425 for (Loop *L : LoopList) {
428 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
429 return false;
430 }
431 if (L->getNumBackEdges() != 1) {
432 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
433 return false;
434 }
435 if (!L->getExitingBlock()) {
436 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
437 return false;
438 }
439 }
440 return true;
441}
442
443namespace {
444
445
446class LoopInterchangeLegality {
447public:
448 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
449 OptimizationRemarkEmitter *ORE)
450 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
451
452
453 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
454 CharMatrix &DepMatrix);
455
456
457
458 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
459
460
461
462 bool isLoopStructureUnderstood();
463
464 bool currentLimitations();
465
466 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
467 return OuterInnerReductions;
468 }
469
471 return InnerLoopInductions;
472 }
473
475 return HasNoWrapReductions;
476 }
477
478private:
479 bool tightlyNested(Loop *Outer, Loop *Inner);
480 bool containsUnsafeInstructions(BasicBlock *BB);
481
482
483
484
485
486 bool findInductionAndReductions(Loop *L,
488 Loop *InnerLoop);
489
490 Loop *OuterLoop;
491 Loop *InnerLoop;
492
493 ScalarEvolution *SE;
494
495
496 OptimizationRemarkEmitter *ORE;
497
498
499
500 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
501
502
504
505
506
507
508 SmallVector<Instruction *, 4> HasNoWrapReductions;
509};
510
511
512
513
514class CacheCostManager {
515 Loop *OutermostLoop;
516 LoopStandardAnalysisResults *AR;
517 DependenceInfo *DI;
518
519
520
521 std::optional<std::unique_ptr> CC;
522
523
524
525 DenseMap<const Loop *, unsigned> CostMap;
526
527 void computeIfUnitinialized();
528
529public:
530 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
531 DependenceInfo *DI)
532 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
533 CacheCost *getCacheCost();
534 const DenseMap<const Loop *, unsigned> &getCostMap();
535};
536
537
538
539class LoopInterchangeProfitability {
540public:
541 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
542 OptimizationRemarkEmitter *ORE)
543 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
544
545
546 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
547 unsigned InnerLoopId, unsigned OuterLoopId,
548 CharMatrix &DepMatrix, CacheCostManager &CCM);
549
550private:
551 int getInstrOrderCost();
552 std::optional isProfitablePerLoopCacheAnalysis(
553 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
554 std::optional isProfitablePerInstrOrderCost();
555 std::optional isProfitableForVectorization(unsigned InnerLoopId,
556 unsigned OuterLoopId,
557 CharMatrix &DepMatrix);
558 Loop *OuterLoop;
559 Loop *InnerLoop;
560
561
562 ScalarEvolution *SE;
563
564
565 OptimizationRemarkEmitter *ORE;
566};
567
568
569class LoopInterchangeTransform {
570public:
571 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
572 LoopInfo *LI, DominatorTree *DT,
573 const LoopInterchangeLegality &LIL)
574 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
575
576
578 void restructureLoops(Loop *NewInner, Loop *NewOuter,
579 BasicBlock *OrigInnerPreHeader,
580 BasicBlock *OrigOuterPreHeader);
581 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
582
583private:
584 bool adjustLoopLinks();
585 bool adjustLoopBranches();
586
587 Loop *OuterLoop;
588 Loop *InnerLoop;
589
590
591 ScalarEvolution *SE;
592
593 LoopInfo *LI;
594 DominatorTree *DT;
595
596 const LoopInterchangeLegality &LIL;
597};
598
599struct LoopInterchange {
600 ScalarEvolution *SE = nullptr;
601 LoopInfo *LI = nullptr;
602 DependenceInfo *DI = nullptr;
603 DominatorTree *DT = nullptr;
604 LoopStandardAnalysisResults *AR = nullptr;
605
606
607 OptimizationRemarkEmitter *ORE;
608
609 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
610 DominatorTree *DT, LoopStandardAnalysisResults *AR,
611 OptimizationRemarkEmitter *ORE)
612 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
613
614 bool run(Loop *L) {
615 if (L->getParentLoop())
616 return false;
617 SmallVector<Loop *, 8> LoopList;
619 return processLoopList(LoopList);
620 }
621
622 bool run(LoopNest &LN) {
623 SmallVector<Loop *, 8> LoopList(LN.getLoops());
624 for (unsigned I = 1; I < LoopList.size(); ++I)
625 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
626 return false;
627 return processLoopList(LoopList);
628 }
629
630 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
631
632
633 return LoopList.size() - 1;
634 }
635
636 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
638
639
641 "Unsupported depth of loop nest.");
642
643 unsigned LoopNestDepth = LoopList.size();
644
645 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
646 << "\n");
647
648 CharMatrix DependencyMatrix;
649 Loop *OuterMostLoop = *(LoopList.begin());
651 OuterMostLoop, DI, SE, ORE)) {
652 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
653 return false;
654 }
655
656 LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
658
659
661 if (!LoopNestExit) {
662 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
663 return false;
664 }
665
666 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
667 CacheCostManager CCM(LoopList[0], AR, DI);
668
669
670
671
672 for (unsigned j = SelecLoopId; j > 0; j--) {
673 bool ChangedPerIter = false;
674 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
675 bool Interchanged =
676 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
677 ChangedPerIter |= Interchanged;
679 }
680
681
682 if (!ChangedPerIter)
683 break;
684 }
686 }
687
688 bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
689 unsigned OuterLoopId,
690 std::vector<std::vector> &DependencyMatrix,
691 CacheCostManager &CCM) {
692 Loop *OuterLoop = LoopList[OuterLoopId];
693 Loop *InnerLoop = LoopList[InnerLoopId];
694 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
695 << " and OuterLoopId = " << OuterLoopId << "\n");
696 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
697 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
698 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
699 return false;
700 }
701 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
702 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
703 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
704 DependencyMatrix, CCM)) {
705 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
706 return false;
707 }
708
709 ORE->emit([&]() {
710 return OptimizationRemark(DEBUG_TYPE, "Interchanged",
713 << "Loop interchanged with enclosing loop.";
714 });
715
716 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
717 LIT.transform(LIL.getHasNoWrapReductions());
719 LoopsInterchanged++;
720
722
723
724 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
725
727
728 LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
730
731 return true;
732 }
733};
734
735}
736
737bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
738 return any_of(*BB, [](const Instruction &I) {
739 return I.mayHaveSideEffects() || I.mayReadFromMemory();
740 });
741}
742
743bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
747
748 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
749
750
751
752
753 BranchInst *OuterLoopHeaderBI =
755 if (!OuterLoopHeaderBI)
756 return false;
757
758 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
759 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
760 Succ != OuterLoopLatch)
761 return false;
762
763 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
764
765
766 if (containsUnsafeInstructions(OuterLoopHeader) ||
767 containsUnsafeInstructions(OuterLoopLatch))
768 return false;
769
770
771
772
773 if (InnerLoopPreHeader != OuterLoopHeader &&
774 containsUnsafeInstructions(InnerLoopPreHeader))
775 return false;
776
778
779
782 if (&SuccInner != OuterLoopLatch) {
783 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
784 << " does not lead to the outer loop latch.\n";);
785 return false;
786 }
787
788
789
790 if (containsUnsafeInstructions(InnerLoopExit))
791 return false;
792
793 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
794
795 return true;
796}
797
798bool LoopInterchangeLegality::isLoopStructureUnderstood() {
800 for (PHINode *InnerInduction : InnerLoopInductions) {
801 unsigned Num = InnerInduction->getNumOperands();
802 for (unsigned i = 0; i < Num; ++i) {
803 Value *Val = InnerInduction->getOperand(i);
805 continue;
807 if ()
808 return false;
809
810
811
813 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
814 InnerLoopPreheader &&
816 return false;
817 }
818 }
819 }
820
821
822
823
824
825
826
828 BranchInst *InnerLoopLatchBI =
831 return false;
832 if (CmpInst *InnerLoopCmp =
834 Value *Op0 = InnerLoopCmp->getOperand(0);
835 Value *Op1 = InnerLoopCmp->getOperand(1);
836
837
838
841
842
843
844
845
846 std::function<bool(Value *)> IsPathToInnerIndVar;
847 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
849 return true;
851 return true;
853 if ()
854 return false;
856 return IsPathToInnerIndVar(I->getOperand(0));
858 return IsPathToInnerIndVar(I->getOperand(0)) &&
859 IsPathToInnerIndVar(I->getOperand(1));
860 return false;
861 };
862
863
864
865 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
866 return true;
867
868
869
870 if (IsPathToInnerIndVar(Op0) && (Op0)) {
873 } else if (IsPathToInnerIndVar(Op1) && (Op1)) {
876 }
877
878 if (Left == nullptr)
879 return false;
880
883 return false;
884 }
885
886 return true;
887}
888
889
890
893 if ()
894 return SV;
895
896 if (PHI->getNumIncomingValues() != 1)
897 return SV;
899}
900
901
902static PHINode *
905
907 return nullptr;
908
911 if (PHI->getNumIncomingValues() == 1)
912 continue;
915
917 return nullptr;
918
920 switch (RK) {
938 return PHI;
939
940
941
942
943
944
945
946
947
948
949
950
951
952
957
958
959 if (Ops.empty())
960 return nullptr;
961
963 assert(I->getOpcode() == OpCode &&
964 "Expected the instruction to be the reduction operation");
965 (void)OpCode;
966
967
968
969 if (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())
971 }
972 return PHI;
973 }
974
975 default:
976 return nullptr;
977 }
978 }
979 return nullptr;
980 }
981 }
982
983 return nullptr;
984}
985
986bool LoopInterchangeLegality::findInductionAndReductions(
988 if (->getLoopLatch() ||
->getLoopPredecessor())
989 return false;
990 for (PHINode &PHI : L->getHeader()->phis()) {
991 InductionDescriptor ID;
994 else {
995
996
997 if (!InnerLoop) {
998 if (!OuterInnerReductions.count(&PHI)) {
999 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
1000 "across the outer loop.\n");
1001 return false;
1002 }
1003 } else {
1004 assert(PHI.getNumIncomingValues() == 2 &&
1005 "Phis in loop header should have exactly 2 incoming values");
1006
1007
1009 PHINode *InnerRedPhi =
1011 if (!InnerRedPhi ||
1015 << "Failed to recognize PHI as an induction or reduction.\n");
1016 return false;
1017 }
1018 OuterInnerReductions.insert(&PHI);
1019 OuterInnerReductions.insert(InnerRedPhi);
1020 }
1021 }
1022 }
1023 return true;
1024}
1025
1026
1027
1028bool LoopInterchangeLegality::currentLimitations() {
1030
1031
1032
1038 dbgs() << "Loops where the latch is not the exiting block are not"
1039 << " supported currently.\n");
1040 ORE->emit([&]() {
1041 return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
1044 << "Loops where the latch is not the exiting block cannot be"
1045 " interchange currently.";
1046 });
1047 return true;
1048 }
1049
1051 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1053 dbgs() << "Only outer loops with induction or reduction PHI nodes "
1054 << "are supported currently.\n");
1055 ORE->emit([&]() {
1056 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
1059 << "Only outer loops with induction or reduction PHI nodes can be"
1060 " interchanged currently.";
1061 });
1062 return true;
1063 }
1064
1065 Inductions.clear();
1066
1067
1068
1069 Loop *CurLevelLoop = OuterLoop;
1070 while (!CurLevelLoop->getSubLoops().empty()) {
1071
1072 CurLevelLoop = CurLevelLoop->getSubLoops().front();
1073 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
1075 dbgs() << "Only inner loops with induction or reduction PHI nodes "
1076 << "are supported currently.\n");
1077 ORE->emit([&]() {
1078 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
1081 << "Only inner loops with induction or reduction PHI nodes can be"
1082 " interchange currently.";
1083 });
1084 return true;
1085 }
1086 }
1087
1088
1089 if (!isLoopStructureUnderstood()) {
1090 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
1091 ORE->emit([&]() {
1092 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
1095 << "Inner loop structure not understood currently.";
1096 });
1097 return true;
1098 }
1099
1100 return false;
1101}
1102
1103bool LoopInterchangeLegality::findInductions(
1104 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
1105 for (PHINode &PHI : L->getHeader()->phis()) {
1106 InductionDescriptor ID;
1109 }
1110 return !Inductions.empty();
1111}
1112
1113
1114
1115
1116static bool
1121
1122 if (PHI.getNumIncomingValues() > 1)
1123 return false;
1125 PHINode *PN = dyn_cast(U);
1126 return !PN ||
1127 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
1128 })) {
1129 return false;
1130 }
1131 }
1132 return true;
1133}
1134
1135
1136
1137
1138
1139
1140
1141
1148 continue;
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1161 return false;
1162 }
1163 }
1164 return true;
1165}
1166
1167
1168
1169
1170
1171
1172
1173
1174
1177 return true;
1178
1179
1180
1182 return true;
1183
1184
1185
1186
1187
1188
1189
1192 for (auto *U : PHI.users()) {
1194 if (InnerLoopLatch == UI->getParent())
1195 return false;
1196 }
1197 }
1198 return true;
1199}
1200
1201bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
1202 unsigned OuterLoopId,
1203 CharMatrix &DepMatrix) {
1205 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
1206 << " and OuterLoopId = " << OuterLoopId
1207 << " due to dependence\n");
1208 ORE->emit([&]() {
1209 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
1212 << "Cannot interchange loops due to dependences.";
1213 });
1214 return false;
1215 }
1216
1217 for (auto *BB : OuterLoop->blocks())
1220
1221 if (CI->onlyWritesMemory())
1222 continue;
1224 dbgs() << "Loops with call instructions cannot be interchanged "
1225 << "safely.");
1226 ORE->emit([&]() {
1227 return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
1228 CI->getDebugLoc(),
1229 CI->getParent())
1230 << "Cannot interchange loops due to call instruction.";
1231 });
1232
1233 return false;
1234 }
1235
1236 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1237 LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
1238 return false;
1239 }
1240
1242 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
1243 ORE->emit([&]() {
1244 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1247 << "Cannot interchange loops because unsupported PHI nodes found "
1248 "in inner loop latch.";
1249 });
1250 return false;
1251 }
1252
1253
1254
1255 if (currentLimitations()) {
1256 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1257 return false;
1258 }
1259
1260
1261 if (!tightlyNested(OuterLoop, InnerLoop)) {
1263 ORE->emit([&]() {
1264 return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1267 << "Cannot interchange loops because they are not tightly "
1268 "nested.";
1269 });
1270 return false;
1271 }
1272
1274 OuterInnerReductions)) {
1275 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1276 ORE->emit([&]() {
1277 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1280 << "Found unsupported PHI node in loop exit.";
1281 });
1282 return false;
1283 }
1284
1286 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1287 ORE->emit([&]() {
1288 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1291 << "Found unsupported PHI node in loop exit.";
1292 });
1293 return false;
1294 }
1295
1296 return true;
1297}
1298
1299void CacheCostManager::computeIfUnitinialized() {
1300 if (CC.has_value())
1301 return;
1302
1305
1306
1307
1308
1309
1310
1311
1312
1313 if (*CC != nullptr)
1314 for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts()))
1315 CostMap[Cost.first] = Idx;
1316}
1317
1318CacheCost *CacheCostManager::getCacheCost() {
1319 computeIfUnitinialized();
1320 return CC->get();
1321}
1322
1323const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1324 computeIfUnitinialized();
1325 return CostMap;
1326}
1327
1328int LoopInterchangeProfitability::getInstrOrderCost() {
1329 unsigned GoodOrder, BadOrder;
1330 BadOrder = GoodOrder = 0;
1331 for (BasicBlock *BB : InnerLoop->blocks()) {
1332 for (Instruction &Ins : *BB) {
1334 bool FoundInnerInduction = false;
1335 bool FoundOuterInduction = false;
1337
1339 continue;
1340
1341 const SCEV *OperandVal = SE->getSCEV(Op);
1343 if (!AR)
1344 continue;
1345
1346
1347
1348
1349
1350
1351 if (AR->getLoop() == InnerLoop) {
1352
1353
1354 FoundInnerInduction = true;
1355 if (FoundOuterInduction) {
1356 GoodOrder++;
1357 break;
1358 }
1359 }
1360
1361
1362
1363
1364
1365 if (AR->getLoop() == OuterLoop) {
1366
1367
1368 FoundOuterInduction = true;
1369 if (FoundInnerInduction) {
1370 BadOrder++;
1371 break;
1372 }
1373 }
1374 }
1375 }
1376 }
1377 }
1378 return GoodOrder - BadOrder;
1379}
1380
1381std::optional
1382LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1383 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1384
1385
1386
1387 auto InnerLoopIt = CostMap.find(InnerLoop);
1388 if (InnerLoopIt == CostMap.end())
1389 return std::nullopt;
1390 auto OuterLoopIt = CostMap.find(OuterLoop);
1391 if (OuterLoopIt == CostMap.end())
1392 return std::nullopt;
1393
1395 return std::nullopt;
1396 unsigned InnerIndex = InnerLoopIt->second;
1397 unsigned OuterIndex = OuterLoopIt->second;
1399 << ", OuterIndex = " << OuterIndex << "\n");
1400 assert(InnerIndex != OuterIndex && "CostMap should assign unique "
1401 "numbers to each loop");
1402 return std::optional(InnerIndex < OuterIndex);
1403}
1404
1405std::optional
1406LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1407
1408
1409
1410 int Cost = getInstrOrderCost();
1413 return std::optional(true);
1414
1415 return std::nullopt;
1416}
1417
1418
1419static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
1420 for (const auto &Dep : DepMatrix) {
1421 char Dir = Dep[LoopId];
1422 char DepType = Dep.back();
1423 assert((DepType == '<' || DepType == '*') &&
1424 "Unexpected element in dependency vector");
1425
1426
1427 if (Dir == '=' || Dir == 'I')
1428 continue;
1429
1430
1431
1432
1433 if (Dir == '<' && DepType == '<')
1434 continue;
1435
1436
1437 return false;
1438 }
1439 return true;
1440}
1441
1442std::optional LoopInterchangeProfitability::isProfitableForVectorization(
1443 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1444
1445
1447 return false;
1448
1449
1450
1452 return true;
1453
1454
1455
1456
1457
1458
1459 return std::nullopt;
1460}
1461
1462bool LoopInterchangeProfitability::isProfitable(
1463 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1464 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1465
1466
1467
1468
1469
1470
1473 if (InnerBTC && InnerBTC->isZero()) {
1474 LLVM_DEBUG(dbgs() << "Inner loop back-edge isn't taken, rejecting "
1475 "single iteration loop\n");
1476 return false;
1477 }
1478 if (OuterBTC && OuterBTC->isZero()) {
1479 LLVM_DEBUG(dbgs() << "Outer loop back-edge isn't taken, rejecting "
1480 "single iteration loop\n");
1481 return false;
1482 }
1483
1484
1486 return true;
1488 "Duplicate rules and option 'ignore' are not allowed");
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498 std::optional shouldInterchange;
1500 switch (RT) {
1501 case RuleTy::PerLoopCacheAnalysis: {
1502 CacheCost *CC = CCM.getCacheCost();
1503 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1504 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1505 break;
1506 }
1507 case RuleTy::PerInstrOrderCost:
1508 shouldInterchange = isProfitablePerInstrOrderCost();
1509 break;
1510 case RuleTy::ForVectorization:
1511 shouldInterchange =
1512 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1513 break;
1514 case RuleTy::Ignore:
1515 llvm_unreachable("Option 'ignore' is not supported with other options");
1516 break;
1517 }
1518
1519
1520
1521 if (shouldInterchange.has_value())
1522 break;
1523 }
1524
1525 if (!shouldInterchange.has_value()) {
1526 ORE->emit([&]() {
1527 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1530 << "Insufficient information to calculate the cost of loop for "
1531 "interchange.";
1532 });
1533 return false;
1534 } else if (!shouldInterchange.value()) {
1535 ORE->emit([&]() {
1536 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1539 << "Interchanging loops is not considered to improve cache "
1540 "locality nor vectorization.";
1541 });
1542 return false;
1543 }
1544 return true;
1545}
1546
1547void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1548 Loop *InnerLoop) {
1549 for (Loop *L : *OuterLoop)
1550 if (L == InnerLoop) {
1551 OuterLoop->removeChildLoop(L);
1552 return;
1553 }
1555}
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580void LoopInterchangeTransform::restructureLoops(
1581 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1582 BasicBlock *OrigOuterPreHeader) {
1583 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1584
1585
1587 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1588
1589
1590 if (OuterLoopParent) {
1591
1592 removeChildLoop(OuterLoopParent, NewInner);
1593 removeChildLoop(NewInner, NewOuter);
1595 } else {
1596 removeChildLoop(NewInner, NewOuter);
1598 }
1602
1603
1604 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1605
1606
1607
1608 for (BasicBlock *BB : NewInner->blocks())
1611
1612
1613
1616 for (BasicBlock *BB : OrigInnerBBs) {
1617
1619 continue;
1620
1621 if (BB == OuterHeader || BB == OuterLatch)
1623 else
1625 }
1626
1627
1628
1630 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1631
1632
1634}
1635
1636bool LoopInterchangeTransform::transform(
1638 bool Transformed = false;
1639
1642 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1643 auto &InductionPHIs = LIL.getInnerLoopInductions();
1644 if (InductionPHIs.empty()) {
1645 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1646 return false;
1647 }
1648
1649 SmallVector<Instruction *, 8> InnerIndexVarList;
1650 for (PHINode *CurInductionPHI : InductionPHIs) {
1651 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1654 else
1657 }
1658
1659
1660
1661
1662
1666
1667 SmallSetVector<Instruction *, 4> WorkList;
1668 unsigned i = 0;
1669 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1670 for (; i < WorkList.size(); i++) {
1671
1672
1673 Instruction *NewI = WorkList[i]->clone();
1676 "Moving instructions with side-effects may change behavior of "
1677 "the loop nest!");
1681 UserI->getParent() == NewLatch ||
1683 U.set(NewI);
1684 }
1685
1686
1687 for (Value *Op : WorkList[i]->operands()) {
1689 if (!OpI ||
1692 continue;
1693 WorkList.insert(OpI);
1694 }
1695 }
1696 };
1697
1698
1701 ->getCondition());
1702 if (CondI)
1703 WorkList.insert(CondI);
1704 MoveInstructions();
1705 for (Instruction *InnerIndexVar : InnerIndexVarList)
1707 MoveInstructions();
1708 }
1709
1710
1715 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1716 }
1717
1718
1719
1720
1721
1722
1724 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1725 if (InnerLoopPreHeader != OuterLoopHeader) {
1726 for (Instruction &I :
1728 std::prev(InnerLoopPreHeader->end()))))
1730 }
1731
1732 Transformed |= adjustLoopLinks();
1733 if (!Transformed) {
1735 return false;
1736 }
1737
1738
1739
1740 for (Instruction *Reduction : DropNoWrapInsts) {
1741 Reduction->setHasNoSignedWrap(false);
1742 Reduction->setHasNoUnsignedWrap(false);
1743 }
1744
1745 return true;
1746}
1747
1748
1749
1756
1757
1759
1760
1764 I->removeFromParent();
1765
1766
1768
1769
1772}
1773
1774
1775
1776
1779 std::vectorDominatorTree::UpdateType &DTUpdates,
1780 bool MustUpdateOnce = true) {
1782 "BI must jump to OldBB exactly once.");
1785 if (Op == OldBB) {
1786 Op.set(NewBB);
1788 }
1789
1791 DTUpdates.push_back(
1792 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1793 DTUpdates.push_back(
1794 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1795 }
1796 assert(Changed && "Expected a successor to be updated");
1797}
1798
1799
1804
1805
1806
1807
1808
1809
1810
1811
1813 assert(P.getNumIncomingValues() == 1 &&
1814 "Only loops with a single exit are supported!");
1815
1816
1817 auto IncI = cast(P.getIncomingValueForBlock(InnerLatch));
1818
1819
1821
1822
1823 if (IncIInnerMost->getParent() != InnerLatch &&
1824 IncIInnerMost->getParent() != InnerHeader)
1825 continue;
1826
1828 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1829 return (cast(U)->getParent() == OuterHeader &&
1830 IncI->getParent() == InnerHeader) ||
1831 cast(U)->getParent() == OuterExit;
1832 }) &&
1833 "Can only replace phis iff the uses are in the loop nest exit or "
1834 "the incoming value is defined in the inner header (it will "
1835 "dominate all loop blocks after interchanging)");
1836 P.replaceAllUsesWith(IncI);
1837 P.eraseFromParent();
1838 }
1839
1842
1845
1846
1847
1848
1849
1850
1851 for (PHINode *P : LcssaInnerExit)
1853
1854
1855
1856 for (PHINode *P : LcssaInnerLatch)
1858
1859
1860
1861
1862
1863 if (OuterExit) {
1865 if (P.getNumIncomingValues() != 1)
1866 continue;
1867
1868
1870 if ( || LI->getLoopFor(I->getParent()) == InnerLoop)
1871 continue;
1872
1876
1877
1878 for (auto *Pred : predecessors(InnerLatch)) {
1879 if (Pred == OuterLatch)
1880 continue;
1881 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1882 }
1884 P.setIncomingValue(0, NewPhi);
1885 }
1886 }
1887
1888
1889
1890
1892}
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1922
1923
1924 if (OuterLoopLatch == InnerLoopExit)
1925 return;
1926
1927
1930 for (PHINode *Phi : Phis) {
1931 assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected");
1932 LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi
1933 << "\n");
1934 Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
1935 Phi->eraseFromParent();
1936 }
1937}
1938
1939bool LoopInterchangeTransform::adjustLoopBranches() {
1940 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1941 std::vectorDominatorTree::UpdateType DTUpdates;
1942
1943 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1945
1946 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1947 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1948 InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1949
1951
1952
1953
1954
1955
1958 OuterLoopPreHeader =
1960 if (InnerLoopPreHeader == OuterLoop->getHeader())
1961 InnerLoopPreHeader =
1963
1964
1966 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1968 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1970 BasicBlock *InnerLoopLatchPredecessor =
1972 BasicBlock *InnerLoopLatchSuccessor;
1973 BasicBlock *OuterLoopLatchSuccessor;
1974
1975 BranchInst *OuterLoopLatchBI =
1977 BranchInst *InnerLoopLatchBI =
1979 BranchInst *OuterLoopHeaderBI =
1981 BranchInst *InnerLoopHeaderBI =
1983
1984 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1985 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1986 !InnerLoopHeaderBI)
1987 return false;
1988
1989 BranchInst *InnerLoopLatchPredecessorBI =
1991 BranchInst *OuterLoopPredecessorBI =
1993
1994 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1995 return false;
1997 if (!InnerLoopHeaderSuccessor)
1998 return false;
1999
2000
2001
2002
2003
2004 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
2005 InnerLoopPreHeader, DTUpdates, false);
2006
2007
2009
2010 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
2011 DTUpdates,
2012 false);
2013 }
2015 InnerLoopHeaderSuccessor, DTUpdates,
2016 false);
2017
2018
2020 OuterLoopHeader);
2021
2022 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
2023 OuterLoopPreHeader, DTUpdates);
2024
2025
2026 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
2027 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
2028 else
2029 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
2030
2031 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
2032 InnerLoopLatchSuccessor, DTUpdates);
2033
2034 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
2035 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
2036 else
2037 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
2038
2039 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
2040 OuterLoopLatchSuccessor, DTUpdates);
2041 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
2042 DTUpdates);
2043
2045 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
2046 OuterLoopPreHeader);
2047
2048 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
2049 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
2050 InnerLoop, LI);
2051
2052
2053 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
2054
2055 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
2056
2058 for (PHINode &PHI : InnerLoopHeader->phis())
2059 if (OuterInnerReductions.contains(&PHI))
2061
2062 for (PHINode &PHI : OuterLoopHeader->phis())
2063 if (OuterInnerReductions.contains(&PHI))
2065
2066
2067
2068
2069 for (PHINode *PHI : OuterLoopPHIs) {
2070 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
2072 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
2073 }
2074 for (PHINode *PHI : InnerLoopPHIs) {
2075 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
2077 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
2078 }
2079
2080
2081 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
2082 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
2083 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
2084 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
2085
2086
2087
2088
2089
2090 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2091 for (Instruction &I :
2092 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
2095
2096 return true;
2097}
2098
2099bool LoopInterchangeTransform::adjustLoopLinks() {
2100
2101 bool Changed = adjustLoopBranches();
2103
2104
2105
2106 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2108 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
2109 }
2111}
2112
2119
2121 LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
2123 }
2125
2126
2129
2131 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
2133 }
2134
2135 ORE.emit([&]() {
2139 << "Computed dependence info, invoking the transform.";
2140 });
2141
2143 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
2145 U.markLoopNestChanged(true);
2147}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
SmallVector< Loop *, 4 > LoopVector
Loop::LoopBounds::Direction Direction
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
Definition LoopInterchange.cpp:903
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
Definition LoopInterchange.cpp:423
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition LoopInterchange.cpp:1142
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...
Definition LoopInterchange.cpp:1919
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
Definition LoopInterchange.cpp:327
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
Definition LoopInterchange.cpp:1800
static void printDepMatrix(CharMatrix &DepMatrix)
Definition LoopInterchange.cpp:135
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
Definition LoopInterchange.cpp:1758
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
Definition LoopInterchange.cpp:402
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
Definition LoopInterchange.cpp:148
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
Definition LoopInterchange.cpp:1175
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
Definition LoopInterchange.cpp:1419
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Definition LoopInterchange.cpp:350
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
Definition LoopInterchange.cpp:1117
#define DEBUG_TYPE
Definition LoopInterchange.cpp:54
static Value * followLCSSA(Value *SV)
Definition LoopInterchange.cpp:891
static void populateWorklist(Loop &L, LoopVector &LoopList)
Definition LoopInterchange.cpp:379
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
Definition LoopInterchange.cpp:166
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
Definition LoopInterchange.cpp:1777
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
Definition LoopInterchange.cpp:339
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
Definition LoopInterchange.cpp:124
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
uint64_t IntrinsicInst * II
SmallVector< Value *, 8 > ValueVector
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
iterator find(const_arg_type_t< KeyT > Val)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
const Loop * getLoop() const
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
constexpr size_t size() const
size - Get the string size.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
iterator_range< user_iterator > users()
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition LoopInterchange.cpp:2113
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...