LLVM: lib/Transforms/Scalar/LoopFuse.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
67
68using namespace llvm;
69
70#define DEBUG_TYPE "loop-fusion"
71
73STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
74STATISTIC(InvalidPreheader, "Loop has invalid preheader");
75STATISTIC(InvalidHeader, "Loop has invalid header");
76STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");
77STATISTIC(InvalidExitBlock, "Loop has invalid exit block");
78STATISTIC(InvalidLatch, "Loop has invalid latch");
80STATISTIC(AddressTakenBB, "Basic block has address taken");
81STATISTIC(MayThrowException, "Loop may throw an exception");
82STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
83STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
84STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
85STATISTIC(UnknownTripCount, "Loop has unknown trip count");
86STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
87STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
88STATISTIC(NonAdjacent, "Loops are not adjacent");
90 NonEmptyPreheader,
91 "Loop has a non-empty preheader with instructions that cannot be moved");
92STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
93STATISTIC(NonIdenticalGuards, "Candidates have different guards");
94STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with "
95 "instructions that cannot be moved");
96STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "
97 "instructions that cannot be moved");
98STATISTIC(NotRotated, "Candidate is not rotated");
100 "The second candidate is guarded while the first one is not");
101STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
102STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
104
110
112 "loop-fusion-dependence-analysis",
113 cl::desc("Which dependence analysis should loop fusion use?"),
115 "Use the scalar evolution interface"),
117 "Use the dependence analysis interface"),
119 "Use all available analyses")),
121
124 cl::desc("Max number of iterations to be peeled from a loop, such that "
125 "fusion can take place"));
126
127#ifndef NDEBUG
130 cl::desc("Enable verbose debugging for Loop Fusion"),
132#endif
133
134namespace {
135
136
137
138
139
140
141
142
143
144
145struct FusionCandidate {
146
147
148
149
150
152
154
156
158
160
162
164
166
167 bool Valid;
168
170
172
173 bool AbleToPeel;
174
175 bool Peeled;
176
177
178
179
180
183
185
188 : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
189 ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
190 Latch(L->getLoopLatch()), L(L), Valid(true),
191 GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
192 Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
193
194
195
196
197
199 if (BB->hasAddressTaken()) {
200 invalidate();
201 reportInvalidCandidate(AddressTakenBB);
202 return;
203 }
204
206 if (I.mayThrow()) {
207 invalidate();
209 return;
210 }
212 if (SI->isVolatile()) {
213 invalidate();
215 return;
216 }
217 }
219 if (LI->isVolatile()) {
220 invalidate();
222 return;
223 }
224 }
225 if (I.mayWriteToMemory())
226 MemWrites.push_back(&I);
227 if (I.mayReadFromMemory())
228 MemReads.push_back(&I);
229 }
230 }
231 }
232
233
235 return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
236 ->isInvalid() && Valid;
237 }
238
239
240 void verify() const {
242 assert(->isInvalid() && "Loop is invalid!");
243 assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");
244 assert(Header == L->getHeader() && "Header is out of sync");
245 assert(ExitingBlock == L->getExitingBlock() &&
246 "Exiting Blocks is out of sync");
247 assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");
248 assert(Latch == L->getLoopLatch() && "Latch is out of sync");
249 }
250
251
252
253
254
255
257 if (GuardBranch)
259 else
260 return Preheader;
261 }
262
263
264
265 void updateAfterPeeling() {
266 Preheader = L->getLoopPreheader();
267 Header = L->getHeader();
268 ExitingBlock = L->getExitingBlock();
269 ExitBlock = L->getExitBlock();
270 Latch = L->getLoopLatch();
272 }
273
274
275
276
277
278
279
280
281 BasicBlock *getNonLoopBlock() const {
282 assert(GuardBranch && "Only valid on guarded loops.");
284 "Expecting guard to be a conditional branch.");
285 if (Peeled)
287 return (GuardBranch->getSuccessor(0) == Preheader)
290 }
291
292#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
294 dbgs() << "\tGuardBranch: ";
295 if (GuardBranch)
296 dbgs() << *GuardBranch;
297 else
298 dbgs() << "nullptr";
299 dbgs() << "\n"
300 << (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"
301 << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
302 << "\n"
303 << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
304 << "\tExitingBB: "
305 << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
306 << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
307 << "\n"
308 << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"
309 << "\tEntryBlock: "
310 << (getEntryBlock() ? getEntryBlock()->getName() : "nullptr")
311 << "\n";
312 }
313#endif
314
315
316
317
320 LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
321 if (!Preheader)
322 ++InvalidPreheader;
323 if (!Header)
324 ++InvalidHeader;
325 if (!ExitingBlock)
326 ++InvalidExitingBlock;
327 if (!ExitBlock)
328 ++InvalidExitBlock;
329 if (!Latch)
330 ++InvalidLatch;
331 if (L->isInvalid())
332 ++InvalidLoop;
333
334 return false;
335 }
336
337
340 << " trip count not computable!\n");
342 }
343
344 if (->isLoopSimplifyForm()) {
346 << " is not in simplified form!\n");
348 }
349
350 if (->isRotatedForm()) {
351 LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n");
353 }
354
355 return true;
356 }
357
358private:
359
360
361
362
363
364
365 void invalidate() {
366 MemWrites.clear();
367 MemReads.clear();
369 }
370
372 using namespace ore;
373 assert(L && Preheader && "Fusion candidate not initialized properly!");
374#if LLVM_ENABLE_STATS
375 ++Stat;
377 L->getStartLoc(), Preheader)
379 << "Loop is not a candidate for fusion: " << Stat.getDesc());
380#endif
381 return false;
382 }
383};
384
385struct FusionCandidateCompare {
386
387
388
389
390
391
392
393
394
395
396 bool operator()(const FusionCandidate &LHS,
397 const FusionCandidate &RHS) const {
398 const DominatorTree *DT = &(LHS.DT);
399
402
403
404
405 assert(DT && LHS.PDT && "Expecting valid dominator tree");
406
407
408 if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
409
410
411 assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
412 return false;
413 }
414
415 if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
416
417 assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
418 return true;
419 }
420
421
422
423
424
425 bool WrongOrder =
427 bool RightOrder =
429 if (WrongOrder && RightOrder) {
430
431
432
433 DomTreeNode *LNode = LHS.PDT->getNode(LHSEntryBlock);
434 DomTreeNode *RNode = LHS.PDT->getNode(RHSEntryBlock);
436 } else if (WrongOrder)
437 return false;
438 else if (RightOrder)
439 return true;
440
441
442
443
445 "No dominance relationship between these fusion candidates!");
446 }
447};
448}
449
451
452
453
454
455
456
457
458
459
460
461
464
465#ifndef NDEBUG
467 dbgs() << "****************************\n";
468 for (const Loop *L : LV)
470 dbgs() << "****************************\n";
471}
472
474 if (FC.isValid())
475 OS << FC.Preheader->getName();
476 else
477 OS << "";
478
479 return OS;
480}
481
484 for (const FusionCandidate &FC : CandSet)
485 OS << FC << '\n';
486
487 return OS;
488}
489
490static void
492 dbgs() << "Fusion Candidates: \n";
493 for (const auto &CandidateSet : FusionCandidates) {
494 dbgs() << "*** Fusion Candidate Set ***\n";
495 dbgs() << CandidateSet;
496 dbgs() << "****************************\n";
497 }
498}
499#endif
500
501namespace {
502
503
504
505
506
507
508
509struct LoopDepthTree {
510 using LoopsOnLevelTy = SmallVector<LoopVector, 4>;
513
514 LoopDepthTree(LoopInfo &LI) : Depth(1) {
517 }
518
519
520
521 bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); }
522
523
524
525 void removeLoop(const Loop *L) { RemovedLoops.insert(L); }
526
527
528 void descend() {
529 LoopsOnLevelTy LoopsOnNextLevel;
530
532 for (Loop *L : LV)
533 if (!isRemovedLoop(L) && L->begin() != L->end())
534 LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end()));
535
536 LoopsOnLevel = LoopsOnNextLevel;
537 RemovedLoops.clear();
538 Depth++;
539 }
540
541 bool empty() const { return size() == 0; }
542 size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }
543 unsigned getDepth() const { return Depth; }
544
545 iterator begin() { return LoopsOnLevel.begin(); }
546 iterator end() { return LoopsOnLevel.end(); }
547 const_iterator begin() const { return LoopsOnLevel.begin(); }
548 const_iterator end() const { return LoopsOnLevel.end(); }
549
550private:
551
552
553 SmallPtrSet<const Loop *, 8> RemovedLoops;
554
555
556 unsigned Depth;
557
558
559 LoopsOnLevelTy LoopsOnLevel;
560};
561
562struct LoopFuser {
563private:
564
566
567 LoopDepthTree LDT;
568 DomTreeUpdater DTU;
569
570 LoopInfo &LI;
571 DominatorTree &DT;
572 DependenceInfo &DI;
573 ScalarEvolution &SE;
574 PostDominatorTree &PDT;
575 OptimizationRemarkEmitter &ORE;
576 AssumptionCache &AC;
577 const TargetTransformInfo &TTI;
578
579public:
580 LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
581 ScalarEvolution &SE, PostDominatorTree &PDT,
582 OptimizationRemarkEmitter &ORE, const DataLayout &DL,
583 AssumptionCache &AC, const TargetTransformInfo &TTI)
584 : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
585 DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
586
587
588
589
590 bool fuseLoops(Function &F) {
591#ifndef NDEBUG
593 LI.print(dbgs());
594 }
595#endif
596
597 LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()
598 << "\n");
600
601 while (!LDT.empty()) {
602 LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "
603 << LDT.getDepth() << "\n";);
604
606 assert(LV.size() > 0 && "Empty loop set was build!");
607
608
609
610 if (LV.size() == 1)
611 continue;
612#ifndef NDEBUG
615 dbgs() << " Visit loop set (#" << LV.size() << "):\n";
617 });
618 }
619#endif
620
621 collectFusionCandidates(LV);
622 Changed |= fuseCandidates();
623 }
624
625
626
627
628
629
630
632 LDT.descend();
633 FusionCandidates.clear();
634 }
635
637 LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););
638
639#ifndef NDEBUG
641 assert(PDT.verify());
642 LI.verify(DT);
643 SE.verify();
644#endif
645
648 }
649
650private:
651
652
653
654
655
656
658 const FusionCandidate &FC1) const {
659 assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
660
661 return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
662 DT, PDT);
663 }
664
665
666
667
668 void collectFusionCandidates(const LoopVector &LV) {
669 for (Loop *L : LV) {
672 FusionCandidate CurrCand(L, DT, &PDT, ORE, PP);
673 if (!CurrCand.isEligibleForFusion(SE))
674 continue;
675
676
677
678
679
680
681 bool FoundSet = false;
682
683 for (auto &CurrCandSet : FusionCandidates) {
685 CurrCandSet.insert(CurrCand);
686 FoundSet = true;
687#ifndef NDEBUG
690 << " to existing candidate set\n");
691#endif
692 break;
693 }
694 }
695 if (!FoundSet) {
696
697#ifndef NDEBUG
699 LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
700#endif
702 NewCandSet.insert(CurrCand);
703 FusionCandidates.push_back(NewCandSet);
704 }
705 NumFusionCandidates++;
706 }
707 }
708
709
710
711
712
713
714 bool isBeneficialFusion(const FusionCandidate &FC0,
715 const FusionCandidate &FC1) {
716 return true;
717 }
718
719
720
721
722
723
724
725
726
727 std::pair<bool, std::optional>
728 haveIdenticalTripCounts(const FusionCandidate &FC0,
729 const FusionCandidate &FC1) const {
730 const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
732 UncomputableTripCount++;
733 LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
734 return {false, std::nullopt};
735 }
736
737 const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
739 UncomputableTripCount++;
740 LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
741 return {false, std::nullopt};
742 }
743
744 LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
745 << *TripCount1 << " are "
746 << (TripCount0 == TripCount1 ? "identical" : "different")
747 << "\n");
748
749 if (TripCount0 == TripCount1)
750 return {true, 0};
751
752 LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
753 "determining the difference between trip counts\n");
754
755
756
757 const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
758 const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
759
760
761
762 if (TC0 == 0 || TC1 == 0) {
763 LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
764 "have a constant number of iterations. Peeling "
765 "is not benefical\n");
766 return {false, std::nullopt};
767 }
768
769 std::optional Difference;
770 int Diff = TC0 - TC1;
771
772 if (Diff > 0)
773 Difference = Diff;
774 else {
776 dbgs() << "Difference is less than 0. FC1 (second loop) has more "
777 "iterations than the first one. Currently not supported\n");
778 }
779
780 LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
781 << "\n");
782
783 return {false, Difference};
784 }
785
786 void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
787 unsigned PeelCount) {
788 assert(FC0.AbleToPeel && "Should be able to peel loop");
789
790 LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
791 << " iterations of the first loop. \n");
792
794 FC0.Peeled =
795 peelLoop(FC0.L, PeelCount, false, &LI, &SE, DT, &AC, true, VMap);
796 if (FC0.Peeled) {
798
799#ifndef NDEBUG
800 auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
801
802 assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
803 "Loops should have identical trip counts after peeling");
804#endif
805
807
808
809 PDT.recalculate(*FC0.Preheader->getParent());
810
811 FC0.updateAfterPeeling();
812
813
814
815
816
817
818
819
820
822 FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
823 if (BB) {
825 SmallVector<Instruction *, 8> WorkList;
827 if (Pred != FC0.ExitBlock) {
828 WorkList.emplace_back(Pred->getTerminator());
830 DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB));
831 }
832 }
833
834
835 for (Instruction *CurrentBranch : WorkList) {
836 BasicBlock *Succ = CurrentBranch->getSuccessor(0);
837 if (Succ == BB)
838 Succ = CurrentBranch->getSuccessor(1);
840 }
841
842 DTU.applyUpdates(TreeUpdates);
843 DTU.flush();
844 }
846 dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
847 << " iterations from the first loop.\n"
848 "Both Loops have the same number of iterations now.\n");
849 }
850 }
851
852
853
854
855
856
857
858
859 bool fuseCandidates() {
860 bool Fused = false;
862 for (auto &CandidateSet : FusionCandidates) {
863 if (CandidateSet.size() < 2)
864 continue;
865
866 LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
867 << CandidateSet << "\n");
868
869 for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
870 assert(!LDT.isRemovedLoop(FC0->L) &&
871 "Should not have removed loops in CandidateSet!");
872 auto FC1 = FC0;
873 for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
874 assert(!LDT.isRemovedLoop(FC1->L) &&
875 "Should not have removed loops in CandidateSet!");
876
877 LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
878 dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
879
880 FC0->verify();
881 FC1->verify();
882
883
884
885
886
887
888 std::pair<bool, std::optional> IdenticalTripCountRes =
889 haveIdenticalTripCounts(*FC0, *FC1);
890 bool SameTripCount = IdenticalTripCountRes.first;
891 std::optional TCDifference = IdenticalTripCountRes.second;
892
893
894
895 if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
898 << "Difference in loop trip counts: " << *TCDifference
899 << " is greater than maximum peel count specificed: "
901 } else {
902
903
904 SameTripCount = true;
905 }
906 }
907
908 if (!SameTripCount) {
909 LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
910 "counts. Not fusing.\n");
911 reportLoopFusion(*FC0, *FC1,
912 NonEqualTripCount);
913 continue;
914 }
915
916 if (!isAdjacent(*FC0, *FC1)) {
918 << "Fusion candidates are not adjacent. Not fusing.\n");
919 reportLoopFusion(*FC0, *FC1, NonAdjacent);
920 continue;
921 }
922
923 if ((!FC0->GuardBranch && FC1->GuardBranch) ||
924 (FC0->GuardBranch && !FC1->GuardBranch)) {
925 LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
926 "another one is not. Not fusing.\n");
927 reportLoopFusion(
928 *FC0, *FC1, OnlySecondCandidateIsGuarded);
929 continue;
930 }
931
932
933
934 if (FC0->GuardBranch && FC1->GuardBranch &&
935 !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
936 LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
937 "guards. Not Fusing.\n");
938 reportLoopFusion(*FC0, *FC1,
939 NonIdenticalGuards);
940 continue;
941 }
942
943 if (FC0->GuardBranch) {
944 assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
945
948 &PDT, &DI)) {
949 LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
950 "instructions in exit block. Not fusing.\n");
951 reportLoopFusion(*FC0, *FC1,
952 NonEmptyExitBlock);
953 continue;
954 }
955
958 *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
959 &DI)) {
961 << "Fusion candidate contains unsafe "
962 "instructions in guard block. Not fusing.\n");
963 reportLoopFusion(*FC0, *FC1,
964 NonEmptyGuardBlock);
965 continue;
966 }
967 }
968
969
970
971 if (!dependencesAllowFusion(*FC0, *FC1)) {
972 LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
973 reportLoopFusion(*FC0, *FC1,
974 InvalidDependencies);
975 continue;
976 }
977
978
979
980
981 SmallVector<Instruction *, 4> SafeToHoist;
982 SmallVector<Instruction *, 4> SafeToSink;
983
984
985
986 if (!isEmptyPreheader(*FC1)) {
987 LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
988 "preheader.\n");
989
990
991
992 if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
993 SafeToSink)) {
994 LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
995 "Fusion Candidate Pre-header.\n"
996 << "Not Fusing.\n");
997 reportLoopFusion(*FC0, *FC1,
998 NonEmptyPreheader);
999 continue;
1000 }
1001 }
1002
1003 bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
1005 << "\tFusion appears to be "
1006 << (BeneficialToFuse ? "" : "un") << "profitable!\n");
1007 if (!BeneficialToFuse) {
1008 reportLoopFusion(*FC0, *FC1,
1009 FusionNotBeneficial);
1010 continue;
1011 }
1012
1013
1014
1015
1016
1017 movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);
1018
1019 LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
1020 << *FC1 << "\n");
1021
1022 FusionCandidate FC0Copy = *FC0;
1023
1024
1025 bool Peel = TCDifference && *TCDifference > 0;
1026 if (Peel)
1027 peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
1028
1029
1030
1031
1032
1033 reportLoopFusion((Peel ? FC0Copy : *FC0), *FC1,
1034 FuseCounter);
1035
1036 FusionCandidate FusedCand(
1037 performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
1038 FC0Copy.PP);
1039 FusedCand.verify();
1040 assert(FusedCand.isEligibleForFusion(SE) &&
1041 "Fused candidate should be eligible for fusion!");
1042
1043
1044 LDT.removeLoop(FC1->L);
1045
1046 CandidateSet.erase(FC0);
1047 CandidateSet.erase(FC1);
1048
1049 auto InsertPos = CandidateSet.insert(FusedCand);
1050
1051 assert(InsertPos.second &&
1052 "Unable to insert TargetCandidate in CandidateSet!");
1053
1054
1055
1056
1057 FC0 = FC1 = InsertPos.first;
1058
1059 LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
1060 << "\n");
1061
1062 Fused = true;
1063 }
1064 }
1065 }
1066 return Fused;
1067 }
1068
1069
1070
1071
1072
1073
1074 bool canHoistInst(Instruction &I,
1075 const SmallVector<Instruction *, 4> &SafeToHoist,
1076 const SmallVector<Instruction *, 4> &NotHoisting,
1077 const FusionCandidate &FC0) const {
1079 assert(FC0PreheaderTarget &&
1080 "Expected single successor for loop preheader.");
1081
1082 for (Use &Op : I.operands()) {
1084 bool OpHoisted = is_contained(SafeToHoist, OpInst);
1085
1086
1087 if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
1088 return false;
1089 }
1090 }
1091 }
1092
1093
1094
1096 return false;
1097
1098
1099 if (.mayReadOrWriteMemory())
1100 return true;
1101
1102 LLVM_DEBUG(dbgs() << "Checking if this mem inst can be hoisted.\n");
1103 for (Instruction *NotHoistedInst : NotHoisting) {
1104 if (auto D = DI.depends(&I, NotHoistedInst)) {
1105
1106
1107 if (D->isFlow() || D->isAnti() || D->isOutput()) {
1108 LLVM_DEBUG(dbgs() << "Inst depends on an instruction in FC1's "
1109 "preheader that is not being hoisted.\n");
1110 return false;
1111 }
1112 }
1113 }
1114
1115 for (Instruction *ReadInst : FC0.MemReads) {
1116 if (auto D = DI.depends(ReadInst, &I)) {
1117
1118 if (D->isAnti()) {
1119 LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC0.\n");
1120 return false;
1121 }
1122 }
1123 }
1124
1125 for (Instruction *WriteInst : FC0.MemWrites) {
1126 if (auto D = DI.depends(WriteInst, &I)) {
1127
1128 if (D->isFlow() || D->isOutput()) {
1129 LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC0.\n");
1130 return false;
1131 }
1132 }
1133 }
1134 return true;
1135 }
1136
1137
1138
1139
1140 bool canSinkInst(Instruction &I, const FusionCandidate &FC1) const {
1141 for (User *U : I.users()) {
1143
1144
1146
1147
1148 return false;
1149 }
1150 }
1151 }
1152
1153
1154 if (.mayReadOrWriteMemory())
1155 return true;
1156
1157 for (Instruction *ReadInst : FC1.MemReads) {
1158 if (auto D = DI.depends(&I, ReadInst)) {
1159
1160 if (D->isFlow()) {
1161 LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC1.\n");
1162 return false;
1163 }
1164 }
1165 }
1166
1167 for (Instruction *WriteInst : FC1.MemWrites) {
1168 if (auto D = DI.depends(&I, WriteInst)) {
1169
1170 if (D->isOutput() || D->isAnti()) {
1171 LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC1.\n");
1172 return false;
1173 }
1174 }
1175 }
1176
1177 return true;
1178 }
1179
1180
1181
1182
1183
1184
1186 const FusionCandidate &FC0,
1187 const FusionCandidate &FC1) const {
1188 for (Instruction *Inst : SafeToSink) {
1189
1191 if (!Phi)
1192 continue;
1193 for (unsigned I = 0; I < Phi->getNumIncomingValues(); I++) {
1194 if (Phi->getIncomingBlock(I) != FC0.Latch)
1195 continue;
1196 assert(FC1.Latch && "FC1 latch is not set");
1197 Phi->setIncomingBlock(I, FC1.Latch);
1198 }
1199 }
1200 }
1201
1202
1203
1204 bool collectMovablePreheaderInsts(
1205 const FusionCandidate &FC0, const FusionCandidate &FC1,
1206 SmallVector<Instruction *, 4> &SafeToHoist,
1207 SmallVector<Instruction *, 4> &SafeToSink) const {
1208 BasicBlock *FC1Preheader = FC1.Preheader;
1209
1210
1211 SmallVector<Instruction *, 4> NotHoisting;
1212
1213 for (Instruction &I : *FC1Preheader) {
1214
1215 if (&I == FC1Preheader->getTerminator())
1216 continue;
1217
1218
1219
1220
1221 if (I.mayThrow() || .willReturn()) {
1222 LLVM_DEBUG(dbgs() << "Inst: " << I << " may throw or won't return.\n");
1223 return false;
1224 }
1225
1227
1228 if (I.isAtomic() || I.isVolatile()) {
1230 dbgs() << "\tInstruction is volatile or atomic. Cannot move it.\n");
1231 return false;
1232 }
1233
1234 if (canHoistInst(I, SafeToHoist, NotHoisting, FC0)) {
1237 } else {
1238 LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");
1240
1241 if (canSinkInst(I, FC1)) {
1244 } else {
1246 return false;
1247 }
1248 }
1249 }
1251 dbgs() << "All preheader instructions could be sunk or hoisted!\n");
1252 return true;
1253 }
1254
1255
1256 class AddRecLoopReplacer : public SCEVRewriteVisitor {
1257 public:
1258 AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
1259 bool UseMax = true)
1260 : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL),
1261 NewL(NewL) {}
1262
1263 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
1264 const Loop *ExprL = Expr->getLoop();
1266 if (ExprL == &OldL) {
1269 }
1270
1271 if (OldL.contains(ExprL)) {
1273 if (!UseMax || !Pos || !Expr->isAffine()) {
1274 Valid = false;
1275 return Expr;
1276 }
1278 }
1279
1280 for (const SCEV *Op : Expr->operands())
1283 }
1284
1285 bool wasValidSCEV() const { return Valid; }
1286
1287 private:
1288 bool Valid, UseMax;
1289 const Loop &OldL, &NewL;
1290 };
1291
1292
1293
1294 bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,
1295 Instruction &I1, bool EqualIsInvalid) {
1298 if (!Ptr0 || !Ptr1)
1299 return false;
1300
1301 const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0);
1302 const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1);
1303#ifndef NDEBUG
1305 LLVM_DEBUG(dbgs() << " Access function check: " << *SCEVPtr0 << " vs "
1306 << *SCEVPtr1 << "\n");
1307#endif
1308 AddRecLoopReplacer Rewriter(SE, L0, L1);
1309 SCEVPtr0 = Rewriter.visit(SCEVPtr0);
1310#ifndef NDEBUG
1312 LLVM_DEBUG(dbgs() << " Access function after rewrite: " << *SCEVPtr0
1313 << " [Valid: " << Rewriter.wasValidSCEV() << "]\n");
1314#endif
1315 if (.wasValidSCEV())
1316 return false;
1317
1318
1319
1320
1321
1323 auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
1325 if (!AddRec)
1326 return false;
1327 return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) &&
1329 };
1330 if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation))
1331 return false;
1332
1333 ICmpInst::Predicate Pred =
1334 EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
1335 bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1);
1336#ifndef NDEBUG
1339 << (IsAlwaysGE ? " >= " : " may < ") << *SCEVPtr1
1340 << "\n");
1341#endif
1342 return IsAlwaysGE;
1343 }
1344
1345
1346
1347
1348 bool dependencesAllowFusion(const FusionCandidate &FC0,
1349 const FusionCandidate &FC1, Instruction &I0,
1350 Instruction &I1, bool AnyDep,
1352#ifndef NDEBUG
1354 LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "
1355 << DepChoice << "\n");
1356 }
1357#endif
1358 switch (DepChoice) {
1360 return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep);
1362 auto DepResult = DI.depends(&I0, &I1);
1363 if (!DepResult)
1364 return true;
1365#ifndef NDEBUG
1368 dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "
1369 << (DepResult->isOrdered() ? "true" : "false")
1370 << "]\n");
1371 LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()
1372 << "\n");
1373 }
1374#endif
1375 unsigned Levels = DepResult->getLevels();
1376 unsigned SameSDLevels = DepResult->getSameSDLevels();
1377 unsigned CurLoopLevel = FC0.L->getLoopDepth();
1378
1379
1380 if (CurLoopLevel > Levels + SameSDLevels)
1381 return false;
1382
1383
1384 for (unsigned Level = 1; Level <= std::min(CurLoopLevel - 1, Levels);
1386 unsigned Direction = DepResult->getDirection(Level, false);
1387
1388
1389
1390
1392 LLVM_DEBUG(dbgs() << "Safe to fuse due to non-equal acceses in the "
1393 "outer loops\n");
1394 NumDA++;
1395 return true;
1396 }
1397 }
1398
1399 assert(CurLoopLevel > Levels && "Fusion candidates are not separated");
1400
1401 unsigned CurDir = DepResult->getDirection(CurLoopLevel, true);
1402
1403
1404
1405
1406
1407
1408
1409
1411 LLVM_DEBUG(dbgs() << "Safe to fuse with no backward loop-carried "
1412 "dependency\n");
1413 NumDA++;
1414 return true;
1415 }
1416
1417 if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
1419 dbgs() << "TODO: Implement pred/succ dependence handling!\n");
1420
1421
1422 return false;
1423 }
1424
1426 return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
1428 dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
1430 }
1431
1432 llvm_unreachable("Unknown fusion dependence analysis choice!");
1433 }
1434
1435
1436 bool dependencesAllowFusion(const FusionCandidate &FC0,
1437 const FusionCandidate &FC1) {
1438 LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
1439 << "\n");
1441 assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()));
1442
1443 for (Instruction *WriteL0 : FC0.MemWrites) {
1444 for (Instruction *WriteL1 : FC1.MemWrites)
1445 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
1446 false,
1448 InvalidDependencies++;
1449 return false;
1450 }
1451 for (Instruction *ReadL1 : FC1.MemReads)
1452 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,
1453 false,
1455 InvalidDependencies++;
1456 return false;
1457 }
1458 }
1459
1460 for (Instruction *WriteL1 : FC1.MemWrites) {
1461 for (Instruction *WriteL0 : FC0.MemWrites)
1462 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
1463 false,
1465 InvalidDependencies++;
1466 return false;
1467 }
1468 for (Instruction *ReadL0 : FC0.MemReads)
1469 if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,
1470 false,
1472 InvalidDependencies++;
1473 return false;
1474 }
1475 }
1476
1477
1478
1479 for (BasicBlock *BB : FC1.L->blocks())
1480 for (Instruction &I : *BB)
1481 for (auto &Op : I.operands())
1483 if (FC0.L->contains(Def->getParent())) {
1484 InvalidDependencies++;
1485 return false;
1486 }
1487
1488 return true;
1489 }
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500 bool isAdjacent(const FusionCandidate &FC0,
1501 const FusionCandidate &FC1) const {
1502
1503 if (FC0.GuardBranch)
1504 return FC0.getNonLoopBlock() == FC1.getEntryBlock();
1505 else
1506 return FC0.ExitBlock == FC1.getEntryBlock();
1507 }
1508
1509 bool isEmptyPreheader(const FusionCandidate &FC) const {
1510 return FC.Preheader->size() == 1;
1511 }
1512
1513
1514
1515 void movePreheaderInsts(const FusionCandidate &FC0,
1516 const FusionCandidate &FC1,
1517 SmallVector<Instruction *, 4> &HoistInsts,
1518 SmallVector<Instruction *, 4> &SinkInsts) const {
1519
1520 assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 &&
1521 "Attempting to sink and hoist preheader instructions, but not all "
1522 "the preheader instructions are accounted for.");
1523
1524 NumHoistedInsts += HoistInsts.size();
1525 NumSunkInsts += SinkInsts.size();
1526
1528 if (!HoistInsts.empty())
1529 dbgs() << "Hoisting: \n";
1530 for (Instruction *I : HoistInsts)
1532 if (!SinkInsts.empty())
1533 dbgs() << "Sinking: \n";
1534 for (Instruction *I : SinkInsts)
1536 });
1537
1538 for (Instruction *I : HoistInsts) {
1539 assert(I->getParent() == FC1.Preheader);
1540 I->moveBefore(*FC0.Preheader,
1542 }
1543
1544 for (Instruction *I : reverse(SinkInsts)) {
1545 assert(I->getParent() == FC1.Preheader);
1547 }
1548
1549
1550 fixPHINodes(SinkInsts, FC0, FC1);
1551 }
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565 bool haveIdenticalGuards(const FusionCandidate &FC0,
1566 const FusionCandidate &FC1) const {
1567 assert(FC0.GuardBranch && FC1.GuardBranch &&
1568 "Expecting FC0 and FC1 to be guarded loops.");
1569
1570 if (auto FC0CmpInst =
1572 if (auto FC1CmpInst =
1574 if (!FC0CmpInst->isIdenticalTo(FC1CmpInst))
1575 return false;
1576
1577
1578
1579
1580 if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader)
1581 return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader);
1582 else
1583 return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
1584 }
1585
1586
1587
1588 void simplifyLatchBranch(const FusionCandidate &FC) const {
1590 if (FCLatchBranch) {
1593 "Expecting the two successors of FCLatchBranch to be the same");
1594 BranchInst *NewBranch =
1597 }
1598 }
1599
1600
1601
1602 void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
1606 DTU.flush();
1607 }
1608 }
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639 Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {
1640 assert(FC0.isValid() && FC1.isValid() &&
1641 "Expecting valid fusion candidates");
1642
1643 LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
1644 dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
1645
1646
1647
1649
1650
1651
1652
1653 if (FC0.GuardBranch)
1654 return fuseGuardedLoops(FC0, FC1);
1655
1656 assert(FC1.Preheader ==
1657 (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
1658 assert(FC1.Preheader->size() == 1 &&
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1671 if (FC0.ExitingBlock != FC0.Latch)
1672 for (PHINode &PHI : FC0.Header->phis())
1674
1675
1678
1679
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699 if (!FC0.Peeled) {
1701 FC1.Header);
1702 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1703 DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
1704 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1705 DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1706 } else {
1707 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1708 DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
1709
1710
1712 FC1.Header);
1713 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1714 DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
1716 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1717 DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1718 new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
1719 }
1720
1721
1724 new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
1725 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1726 DominatorTree::Delete, FC1.Preheader, FC1.Header));
1727
1728
1730 if (SE.isSCEVable(PHI->getType()))
1731 SE.forgetValue(PHI);
1732 if (PHI->hasNUsesOrMore(1))
1734 else
1735 PHI->eraseFromParent();
1736 }
1737
1738
1739
1740
1741
1743 for (PHINode *LCPHI : OriginalFC0PHIs) {
1744 int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
1745 assert(L1LatchBBIdx >= 0 &&
1746 "Expected loop carried value to be rewired at this point!");
1747
1748 Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
1749
1750 PHINode *L1HeaderPHI =
1753 L1HeaderPHI->addIncoming(LCV, FC0.Latch);
1755 FC0.ExitingBlock);
1756
1757 LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
1758 }
1759
1760
1763
1764
1765
1766 simplifyLatchBranch(FC0);
1767
1768
1769
1770 if (FC0.Latch != FC0.ExitingBlock)
1771 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1772 DominatorTree::Insert, FC0.Latch, FC1.Header));
1773
1774 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
1775 FC0.Latch, FC0.Header));
1776 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
1777 FC1.Latch, FC0.Header));
1778 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
1779 FC1.Latch, FC1.Header));
1780
1781
1782 DTU.applyUpdates(TreeUpdates);
1783
1784 LI.removeBlock(FC1.Preheader);
1785 DTU.deleteBB(FC1.Preheader);
1786 if (FC0.Peeled) {
1787 LI.removeBlock(FC0.ExitBlock);
1788 DTU.deleteBB(FC0.ExitBlock);
1789 }
1790
1791 DTU.flush();
1792
1793
1794
1795
1796
1797 SE.forgetLoop(FC1.L);
1798 SE.forgetLoop(FC0.L);
1799
1800
1801
1802 mergeLatch(FC0, FC1);
1803
1804
1805
1806
1807 SE.forgetBlockAndLoopDispositions();
1808
1809
1810 SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
1811 for (BasicBlock *BB : Blocks) {
1814 if (LI.getLoopFor(BB) != FC1.L)
1815 continue;
1816 LI.changeLoopFor(BB, FC0.L);
1817 }
1819 const auto &ChildLoopIt = FC1.L->begin();
1820 Loop *ChildLoop = *ChildLoopIt;
1823 }
1824
1825
1826 LI.erase(FC1.L);
1827
1828#ifndef NDEBUG
1830 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1831 assert(PDT.verify());
1832 LI.verify(DT);
1833 SE.verify();
1834#endif
1835
1837
1838 return FC0.L;
1839 }
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853 template
1854 void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
1856 assert(FC0.Preheader && FC1.Preheader &&
1857 "Expecting valid fusion candidates");
1858 using namespace ore;
1859#if LLVM_ENABLE_STATS
1860 ++Stat;
1862 FC0.Preheader)
1864 << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))
1865 << " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
1866 << ": " << Stat.getDesc());
1867#endif
1868 }
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885 Loop *fuseGuardedLoops(const FusionCandidate &FC0,
1886 const FusionCandidate &FC1) {
1887 assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops");
1888
1891 BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
1892 BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
1894
1895
1896
1897
1898
1900 (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
1901 DT, PDT, DI);
1902
1903
1904
1906
1907 assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
1908
1910
1911
1912
1913
1914
1915
1916
1917
1918
1920 FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
1921
1922 BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
1924
1925
1927 new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock);
1928
1929 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1930 DominatorTree::Delete, FC1GuardBlock, FC1.Preheader));
1931 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1932 DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock));
1933 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1934 DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock));
1935 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1936 DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
1937
1938 if (FC0.Peeled) {
1939
1940 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1941 DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
1943 new UnreachableInst(FC0ExitBlockSuccessor->getContext(),
1944 FC0ExitBlockSuccessor);
1945 }
1946
1948 "Expecting guard block to have no predecessors");
1950 "Expecting guard block to have no successors");
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1965 if (FC0.ExitingBlock != FC0.Latch)
1966 for (PHINode &PHI : FC0.Header->phis())
1968
1969 assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!");
1970
1971
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1989 FC1.Header);
1990
1991 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1992 DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
1993 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1994 DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1995
1996
1997
1998
1999
2000
2001
2002
2003 assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty");
2005 new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
2006
2007
2008
2011 new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
2012 TreeUpdates.emplace_back(DominatorTree::UpdateType(
2013 DominatorTree::Delete, FC1.Preheader, FC1.Header));
2014
2015
2017 if (SE.isSCEVable(PHI->getType()))
2018 SE.forgetValue(PHI);
2019 if (PHI->hasNUsesOrMore(1))
2021 else
2022 PHI->eraseFromParent();
2023 }
2024
2025
2026
2027
2028
2030 for (PHINode *LCPHI : OriginalFC0PHIs) {
2031 int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
2032 assert(L1LatchBBIdx >= 0 &&
2033 "Expected loop carried value to be rewired at this point!");
2034
2035 Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
2036
2037 PHINode *L1HeaderPHI =
2040 L1HeaderPHI->addIncoming(LCV, FC0.Latch);
2042 FC0.ExitingBlock);
2043
2044 LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
2045 }
2046
2047
2048
2049
2052
2053
2054
2055 simplifyLatchBranch(FC0);
2056
2057
2058
2059 if (FC0.Latch != FC0.ExitingBlock)
2060 TreeUpdates.emplace_back(DominatorTree::UpdateType(
2061 DominatorTree::Insert, FC0.Latch, FC1.Header));
2062
2063 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
2064 FC0.Latch, FC0.Header));
2065 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
2066 FC1.Latch, FC0.Header));
2067 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
2068 FC1.Latch, FC1.Header));
2069
2070
2071
2072
2073 assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!");
2074 assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!");
2075
2076
2077 DTU.applyUpdates(TreeUpdates);
2078
2079 LI.removeBlock(FC1GuardBlock);
2080 LI.removeBlock(FC1.Preheader);
2081 LI.removeBlock(FC0.ExitBlock);
2082 if (FC0.Peeled) {
2083 LI.removeBlock(FC0ExitBlockSuccessor);
2084 DTU.deleteBB(FC0ExitBlockSuccessor);
2085 }
2086 DTU.deleteBB(FC1GuardBlock);
2087 DTU.deleteBB(FC1.Preheader);
2088 DTU.deleteBB(FC0.ExitBlock);
2089 DTU.flush();
2090
2091
2092
2093
2094
2095 SE.forgetLoop(FC1.L);
2096 SE.forgetLoop(FC0.L);
2097
2098
2099
2100 mergeLatch(FC0, FC1);
2101
2102
2103
2104
2105 SE.forgetBlockAndLoopDispositions();
2106
2107
2108 SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
2109 for (BasicBlock *BB : Blocks) {
2112 if (LI.getLoopFor(BB) != FC1.L)
2113 continue;
2114 LI.changeLoopFor(BB, FC0.L);
2115 }
2117 const auto &ChildLoopIt = FC1.L->begin();
2118 Loop *ChildLoop = *ChildLoopIt;
2121 }
2122
2123
2124 LI.erase(FC1.L);
2125
2126#ifndef NDEBUG
2128 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2129 assert(PDT.verify());
2130 LI.verify(DT);
2131 SE.verify();
2132#endif
2133
2135
2136 return FC0.L;
2137 }
2138};
2139}
2140
2151
2152
2153
2154
2156 for (auto &L : LI) {
2158 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false );
2159 }
2161 PDT.recalculate(F);
2162
2163 LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
2167
2173 return PA;
2174}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool reportInvalidCandidate(const Instruction &I, llvm::Statistic &Stat)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static void printFusionCandidates(const FusionCandidateCollection &FusionCandidates)
Definition LoopFuse.cpp:491
static cl::opt< FusionDependenceAnalysisChoice > FusionDependenceAnalysis("loop-fusion-dependence-analysis", cl::desc("Which dependence analysis should loop fusion use?"), cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev", "Use the scalar evolution interface"), clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da", "Use the dependence analysis interface"), clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all", "Use all available analyses")), cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL))
static void printLoopVector(const LoopVector &LV)
Definition LoopFuse.cpp:466
std::set< FusionCandidate, FusionCandidateCompare > FusionCandidateSet
Definition LoopFuse.cpp:462
SmallVector< Loop *, 4 > LoopVector
Definition LoopFuse.cpp:450
SmallVector< FusionCandidateSet, 4 > FusionCandidateCollection
Definition LoopFuse.cpp:463
FusionDependenceAnalysisChoice
Definition LoopFuse.cpp:105
@ FUSION_DEPENDENCE_ANALYSIS_DA
Definition LoopFuse.cpp:107
@ FUSION_DEPENDENCE_ANALYSIS_ALL
Definition LoopFuse.cpp:108
@ FUSION_DEPENDENCE_ANALYSIS_SCEV
Definition LoopFuse.cpp:106
static cl::opt< bool > VerboseFusionDebugging("loop-fusion-verbose-debug", cl::desc("Enable verbose debugging for Loop Fusion"), cl::Hidden, cl::init(false))
static cl::opt< unsigned > FusionPeelMaxCount("loop-fusion-peel-max-count", cl::init(0), cl::Hidden, cl::desc("Max number of iterations to be peeled from a loop, such that " "fusion can take place"))
#define DEBUG_TYPE
Definition LoopFuse.cpp:70
This file implements the Loop Fusion pass.
Loop::LoopBounds::Direction Direction
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const Instruction & front() const
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
A parsed version of the target data layout string in and methods for querying it.
AnalysisPass to compute dependence information in a function.
unsigned getLevel() const
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Analysis pass that exposes the LoopInfo for a function.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition LoopFuse.cpp:2141
reverse_iterator rend() const
reverse_iterator rbegin() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
ArrayRef< const SCEV * > operands() const
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
@ Valid
The data is already valid.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
LLVM_ABI void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool succ_empty(const Instruction *I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI bool verifyFunction(const Function &F, raw_ostream *OS=nullptr)
Check a function for errors, useful for use when debugging a pass.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool canPeel(const Loop *L)
LLVM_ABI void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI)
Move instructions, in an order-preserving manner, from FromBB to the end of ToBB when proven safe.
DomTreeNodeBase< BasicBlock > DomTreeNode
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI bool isControlFlowEquivalent(const Instruction &I0, const Instruction &I1, const DominatorTree &DT, const PostDominatorTree &PDT)
Return true if I0 and I1 are control flow equivalent.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI bool nonStrictlyPostDominate(const BasicBlock *ThisBlock, const BasicBlock *OtherBlock, const DominatorTree *DT, const PostDominatorTree *PDT)
In case that two BBs ThisBlock and OtherBlock are control flow equivalent but they do not strictly do...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI void moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI)
Move instructions, in an order-preserving manner, from FromBB to the beginning of ToBB when proven sa...
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void printLoop(const Loop &L, raw_ostream &OS, const std::string &Banner="")
Function to print a loop's contents as LLVM's text IR assembly.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, DominatorTree &DT, const PostDominatorTree *PDT=nullptr, DependenceInfo *DI=nullptr, bool CheckForEntireBlock=false)
Return true if I can be safely moved before InsertPoint.
bool peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...