LLVM: lib/Transforms/Scalar/LoopUnrollPass.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
62#include
63#include
64#include
65#include
66#include
67#include
68#include
69#include
70
71using namespace llvm;
72
73#define DEBUG_TYPE "loop-unroll"
74
77 cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
78 " the current top-most loop. This is sometimes preferred to reduce"
79 " compile time."));
80
83 cl::desc("The cost threshold for loop unrolling"));
84
88 cl::desc("The cost threshold for loop unrolling when optimizing for "
89 "size"));
90
92 "unroll-partial-threshold", cl::Hidden,
93 cl::desc("The cost threshold for partial loop unrolling"));
94
97 cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
98 "to the threshold when aggressively unrolling a loop due to the "
99 "dynamic cost savings. If completely unrolling a loop will reduce "
100 "the total runtime from X to Y, we boost the loop unroll "
101 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
102 "X/Y). This limit avoids excessive code bloat."));
103
106 cl::desc("Don't allow loop unrolling to simulate more than this number of "
107 "iterations when checking full unroll profitability"));
108
111 cl::desc("Use this unroll count for all loops including those with "
112 "unroll_count pragma values, for testing purposes"));
113
116 cl::desc("Set the max unroll count for partial and runtime unrolling, for"
117 "testing purposes"));
118
122 "Set the max unroll count for full unrolling, for testing purposes"));
123
126 cl::desc("Allows loops to be partially unrolled until "
127 "-unroll-threshold loop size is reached."));
128
130 "unroll-allow-remainder", cl::Hidden,
131 cl::desc("Allow generation of a loop remainder (extra iterations) "
132 "when unrolling a loop."));
133
136 cl::desc("Unroll loops with run-time trip counts"));
137
141 "The max of trip count upper bound that is considered in unrolling"));
142
145 cl::desc("Unrolled size limit for loops with an unroll(full) or "
146 "unroll_count pragma."));
147
150 cl::desc("If the runtime tripcount for the loop is lower than the "
151 "threshold, the loop is considered as flat and will be less "
152 "aggressively unrolled."));
153
156 cl::desc("Allow the loop remainder to be unrolled."));
157
158
159
160
162 "unroll-revisit-child-loops", cl::Hidden,
163 cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
164 "This shouldn't typically be needed as child loops (or their "
165 "clones) were already visited."));
166
169 cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
170 "optimizations"));
174 cl::desc("Default threshold (max size of unrolled "
175 "loop), used in all but O3 optimizations"));
176
178 "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
179 cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
180
181
182
183
184static const unsigned NoThreshold = std::numeric_limits::max();
185
186
187
192 std::optional UserThreshold, std::optional UserCount,
193 std::optional UserAllowPartial, std::optional UserRuntime,
194 std::optional UserUpperBound,
195 std::optional UserFullUnrollMaxCount) {
197
198
207 UP.MaxCount = std::numeric_limits::max();
216 UP.Force = false;
224
225
226 TTI.getUnrollingPreferences(L, SE, UP, &ORE);
227
228
229 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
230
234 if (OptForSize) {
238 }
239
240
265
266
267 if (UserThreshold) {
270 }
271 if (UserCount)
272 UP.Count = *UserCount;
273 if (UserAllowPartial)
274 UP.Partial = *UserAllowPartial;
275 if (UserRuntime)
276 UP.Runtime = *UserRuntime;
277 if (UserUpperBound)
279 if (UserFullUnrollMaxCount)
281
282 return UP;
283}
284
285namespace {
286
287
288
289
290
291
292
293struct UnrolledInstState {
295 int Iteration : 30;
296 unsigned IsFree : 1;
297 unsigned IsCounted : 1;
298};
299
300
301struct UnrolledInstStateKeyInfo {
302 using PtrInfo = DenseMapInfo<Instruction *>;
303 using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
304
305 static inline UnrolledInstState getEmptyKey() {
306 return {PtrInfo::getEmptyKey(), 0, 0, 0};
307 }
308
309 static inline UnrolledInstState getTombstoneKey() {
310 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
311 }
312
313 static inline unsigned getHashValue(const UnrolledInstState &S) {
314 return PairInfo::getHashValue({S.I, S.Iteration});
315 }
316
317 static inline bool isEqual(const UnrolledInstState &LHS,
318 const UnrolledInstState &RHS) {
319 return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
320 }
321};
322
323struct EstimatedUnrollCost {
324
325 unsigned UnrolledCost;
326
327
328
329 unsigned RolledDynamicCost;
330};
331
332struct PragmaInfo {
333 PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
334 : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
335 PragmaEnableUnroll(PEU) {}
336 const bool UserUnrollCount;
337 const bool PragmaFullUnroll;
338 const unsigned PragmaCount;
339 const bool PragmaEnableUnroll;
340};
341
342}
343
344
345
346
347
348
349
350
351
352
353
354
355
356
361 unsigned MaxIterationsCountToAnalyze) {
362
363
364
365 assert(MaxIterationsCountToAnalyze <
366 (unsigned)(std::numeric_limits::max() / 2) &&
367 "The unroll iterations max is too large!");
368
369
370
371 if (!L->isInnermost())
372 return std::nullopt;
373
374
375 if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
376 return std::nullopt;
377
382
383
384
386
387
388
389
390
391
393
394
395
396
397
399
400
401
403
404
406
407
408 auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
409 assert(Iteration >= 0 && "Cannot have a negative iteration!");
410 assert(CostWorklist.empty() && "Must start with an empty cost list");
411 assert(PHIUsedList.empty() && "Must start with an empty phi used list");
417 for (;; --Iteration) {
418 do {
420
421
422
423 auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
424 if (CostIter == InstCostMap.end())
425
426
427
428 continue;
429 auto &Cost = *CostIter;
430 if (Cost.IsCounted)
431
432 continue;
433
434
435 Cost.IsCounted = true;
436
437
439 if (PhiI->getParent() == L->getHeader()) {
440 assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
441 "inherently simplify during unrolling.");
442 if (Iteration == 0)
443 continue;
444
445
446
447
449 PhiI->getIncomingValueForBlock(L->getLoopLatch())))
450 if (L->contains(OpI))
452 continue;
453 }
454
455
456 if (!Cost.IsFree) {
457
459 transform(I->operands(), std::back_inserter(Operands),
461 if (auto Res = SimplifiedValues.lookup(Op))
462 return Res;
463 return Op;
464 });
465 UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
466 LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
467 << Iteration << "): ");
469 }
470
471
472
473
474 for (Value *Op : I->operands()) {
475
476
478 if (!OpI || !L->contains(OpI))
479 continue;
480
481
483 }
484 } while (!CostWorklist.empty());
485
486 if (PHIUsedList.empty())
487
488 break;
489
490 assert(Iteration > 0 &&
491 "Cannot track PHI-used values past the first iteration!");
492 CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
493 PHIUsedList.clear();
494 }
495 };
496
497
498
499 assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
500 assert(L->isLCSSAForm(DT) &&
501 "Must have loops in LCSSA form to track live-out values.");
502
503 LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
504
506 L->getHeader()->getParent()->hasMinSize() ?
508
509
510
511
512 for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
513 LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
514
515
516
519 if ()
520 break;
521
522
523
525 PHI->getNumIncomingValues() == 2 &&
526 "Must have an incoming value only for the preheader and the latch.");
527
528 Value *V = PHI->getIncomingValueForBlock(
529 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
530 if (Iteration != 0 && SimplifiedValues.count(V))
531 V = SimplifiedValues.lookup(V);
533 }
534
535
536 SimplifiedValues.clear();
537 while (!SimplifiedInputValues.empty())
539
541
542 BBWorklist.clear();
543 BBWorklist.insert(L->getHeader());
544
545 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
547
548
549
550
552
553
555 continue;
556
557
558
559 RolledDynamicCost += TTI.getInstructionCost(&I, CostKind);
560
561
562
563
564 bool IsFree = Analyzer.visit(I);
565 bool Inserted = InstCostMap.insert({&I, (int)Iteration,
566 (unsigned)IsFree,
567 false}).second;
568 (void)Inserted;
569 assert(Inserted && "Cannot have a state for an unvisited instruction!");
570
571 if (IsFree)
572 continue;
573
574
575
577 const Function *Callee = CI->getCalledFunction();
578 if (!Callee || TTI.isLoweredToCall(Callee)) {
579 LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
580 return std::nullopt;
581 }
582 }
583
584
585
586 if (I.mayHaveSideEffects())
587 AddCostRecursively(I, Iteration);
588
589
590 if (UnrolledCost > MaxUnrolledLoopSize) {
591 LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
592 << " UnrolledCost: " << UnrolledCost
593 << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
594 << "\n");
595 return std::nullopt;
596 }
597 }
598
600
601 auto getSimplifiedConstant = [&](Value *V) -> Constant * {
602 if (SimplifiedValues.count(V))
603 V = SimplifiedValues.lookup(V);
605 };
606
607
608
611 if (BI->isConditional()) {
612 if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
613
615 KnownSucc = BI->getSuccessor(0);
618 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
619 }
620 }
622 if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
623
625 KnownSucc = SI->getSuccessor(0);
628 KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
629 }
630 }
631 if (KnownSucc) {
632 if (L->contains(KnownSucc))
633 BBWorklist.insert(KnownSucc);
634 else
635 ExitWorklist.insert({BB, KnownSucc});
636 continue;
637 }
638
639
641 if (L->contains(Succ))
642 BBWorklist.insert(Succ);
643 else
644 ExitWorklist.insert({BB, Succ});
645 AddCostRecursively(*TI, Iteration);
646 }
647
648
649
650 if (UnrolledCost == RolledDynamicCost) {
651 LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
652 << " UnrolledCost: " << UnrolledCost << "\n");
653 return std::nullopt;
654 }
655 }
656
657 while (!ExitWorklist.empty()) {
659 std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
660
663 if (!PN)
664 break;
665
666 Value *Op = PN->getIncomingValueForBlock(ExitingBB);
668 if (L->contains(OpI))
669 AddCostRecursively(*OpI, TripCount - 1);
670 }
671 }
672
674 "All instructions must have a valid cost, whether the "
675 "loop is rolled or unrolled.");
676
678 << "UnrolledCost: " << UnrolledCost << ", "
679 << "RolledDynamicCost: " << RolledDynamicCost << "\n");
682}
683
689 Metrics.analyzeBasicBlock(BB, TTI, EphValues, false,
690 L);
692 NotDuplicatable = Metrics.notDuplicatable;
694 LoopSize = Metrics.NumInsts;
698
699
700
701
702
703
704
705 if (LoopSize.isValid() && LoopSize < BEInsns + 1)
706
707 LoopSize = BEInsns + 1;
708}
709
713 LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");
714 return false;
715 default:
716 break;
717 }
718 if (!LoopSize.isValid()) {
719 LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");
720 return false;
721 }
722 if (NotDuplicatable) {
723 LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");
724 return false;
725 }
726 return true;
727}
728
731 unsigned CountOverwrite) const {
732 unsigned LS = LoopSize.getValue();
733 assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
734 if (CountOverwrite)
736 else
738}
739
740
741
742
744 if (MDNode *LoopID = L->getLoopID())
746 return nullptr;
747}
748
749
753
754
755
759
760
764
765
766
769 if (MD) {
771 "Unroll count hint metadata should have two operands.");
774 assert(Count >= 1 && "Unroll count must be positive.");
776 }
777 return 0;
778}
779
780
781
782
783
784
786 unsigned MaxPercentThresholdBoost) {
787 if (Cost.RolledDynamicCost >= std::numeric_limits::max() / 100)
788 return 100;
789 else if (Cost.UnrolledCost != 0)
790
791 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
792 MaxPercentThresholdBoost);
793 else
794 return MaxPercentThresholdBoost;
795}
796
797static std::optional
799 const unsigned TripMultiple, const unsigned TripCount,
802
803
804
805
806 if (PInfo.UserUnrollCount) {
810 }
811
812
813 if (PInfo.PragmaCount > 0) {
814 if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
815 return PInfo.PragmaCount;
816 }
817
818 if (PInfo.PragmaFullUnroll && TripCount != 0) {
819
820
821
823 LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");
824 return std::nullopt;
825 }
826
827 return TripCount;
828 }
829
830 if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
832 return MaxTripCount;
833
834
835 return std::nullopt;
836}
837
843 assert(FullUnrollTripCount && "should be non-zero!");
844
846 return std::nullopt;
847
848
849
851 return FullUnrollTripCount;
852
853
854
855
857 L, FullUnrollTripCount, DT, SE, EphValues, TTI,
860 unsigned Boost =
862 if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
863 return FullUnrollTripCount;
864 }
865 return std::nullopt;
866}
867
868static std::optional
872
873 if (!TripCount)
874 return std::nullopt;
875
877 LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
878 << "-unroll-allow-partial not given\n");
879 return 0;
880 }
883 count = TripCount;
885
891 while (count != 0 && TripCount % count != 0)
894
895
896
897
899 while (count != 0 &&
902 }
905 }
906 } else {
907 count = TripCount;
908 }
911
912 LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
913
915}
916
917
918
919
920
921
922
923
932
934
935 const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
939
940 const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
941 PragmaEnableUnroll || UserUnrollCount;
942
943 PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
944 PragmaEnableUnroll);
945
946
948 if (UnrollCount.getNumOccurrences() > 0) {
950 "explicit unroll count");
951 }
954 return true;
955 }
956
957
958
959 if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
960 MaxTripCount, UCE, UP)) {
961 UP.Count = *UnrollFactor;
962
963 if (UserUnrollCount || (PragmaCount > 0)) {
966 }
967 UP.Runtime |= (PragmaCount > 0);
968 return ExplicitUnroll;
969 } else {
970 if (ExplicitUnroll && TripCount != 0) {
971
972
973
977 }
978 }
979
980
981
983 if (TripCount) {
984 UP.Count = TripCount;
986 TripCount, UCE, UP)) {
987 UP.Count = *UnrollFactor;
988 UseUpperBound = false;
989 return ExplicitUnroll;
990 }
991 }
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005 if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
1007 UP.Count = MaxTripCount;
1009 MaxTripCount, UCE, UP)) {
1010 UP.Count = *UnrollFactor;
1011 UseUpperBound = true;
1012 return ExplicitUnroll;
1013 }
1014 }
1015
1016
1021 return ExplicitUnroll;
1022 }
1023
1024
1025
1026 if (TripCount)
1027 UP.Partial |= ExplicitUnroll;
1028
1029
1030
1031 if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
1032 UP.Count = *UnrollFactor;
1033
1034 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1035 UP.Count != TripCount)
1036 ORE->emit([&]() {
1038 "FullUnrollAsDirectedTooLarge",
1039 L->getStartLoc(), L->getHeader())
1040 << "Unable to fully unroll loop as directed by unroll pragma "
1041 "because "
1042 "unrolled size is too large.";
1043 });
1044
1046 if (UP.Count == 0) {
1047 if (PragmaEnableUnroll)
1048 ORE->emit([&]() {
1050 "UnrollAsDirectedTooLarge",
1051 L->getStartLoc(), L->getHeader())
1052 << "Unable to unroll loop as directed by unroll(enable) "
1053 "pragma "
1054 "because unrolled size is too large.";
1055 });
1056 }
1057 }
1058 return ExplicitUnroll;
1059 }
1060 assert(TripCount == 0 &&
1061 "All cases when TripCount is constant should be covered here.");
1062 if (PragmaFullUnroll)
1063 ORE->emit([&]() {
1065 DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
1066 L->getStartLoc(), L->getHeader())
1067 << "Unable to fully unroll loop as directed by unroll(full) "
1068 "pragma "
1069 "because loop has a runtime trip count.";
1070 });
1071
1072
1073
1076 return false;
1077 }
1078
1079
1082 return false;
1083 }
1084
1085
1086 if (L->getHeader()->getParent()->hasProfileData()) {
1089 return false;
1090 else
1092 }
1093 }
1094 UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1097 dbgs() << " will not try to unroll loop with runtime trip count "
1098 << "-unroll-runtime not given\n");
1100 return false;
1101 }
1102 if (UP.Count == 0)
1104
1105
1106
1107 while (UP.Count != 0 &&
1110
1111#ifndef NDEBUG
1112 unsigned OrigCount = UP.Count;
1113#endif
1114
1116 while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1119 dbgs() << "Remainder loop is restricted (that could architecture "
1120 "specific or because the loop contains a convergent "
1121 "instruction), so unroll count must divide the trip "
1122 "multiple, "
1123 << TripMultiple << ". Reducing unroll count from " << OrigCount
1124 << " to " << UP.Count << ".\n");
1125
1126 using namespace ore;
1127
1129 ORE->emit([&]() {
1131 "DifferentUnrollCountFromDirected",
1132 L->getStartLoc(), L->getHeader())
1133 << "Unable to unroll loop the number of times directed by "
1134 "unroll_count pragma because remainder loop is restricted "
1135 "(that could architecture specific or because the loop "
1136 "contains a convergent instruction) and so must have an "
1137 "unroll "
1138 "count that divides the loop trip multiple of "
1139 << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
1140 << NV("UnrollCount", UP.Count) << " time(s).";
1141 });
1142 }
1143
1146
1147 if (MaxTripCount && UP.Count > MaxTripCount)
1148 UP.Count = MaxTripCount;
1149
1151 << "\n");
1152 if (UP.Count < 2)
1154 return ExplicitUnroll;
1155}
1156
1162 bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
1163 std::optional ProvidedCount,
1164 std::optional ProvidedThreshold,
1165 std::optional ProvidedAllowPartial,
1166 std::optional ProvidedRuntime,
1167 std::optional ProvidedUpperBound,
1168 std::optional ProvidedAllowPeeling,
1169 std::optional ProvidedAllowProfileBasedPeeling,
1170 std::optional ProvidedFullUnrollMaxCount,
1172
1174 << L->getHeader()->getParent()->getName() << "] Loop %"
1175 << L->getHeader()->getName() << "\n");
1179
1180
1181
1182
1183
1184 Loop *ParentL = L->getParentLoop();
1185 if (ParentL != nullptr &&
1188 LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
1189 << " llvm.loop.unroll_and_jam.\n");
1191 }
1192
1193
1194
1195
1200 << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1202 }
1203
1204 if (!L->isLoopSimplifyForm()) {
1206 dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1208 }
1209
1210
1211
1212 if (OnlyWhenForced && !(TM & TM_Enable))
1214
1215 bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1217 L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1218 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1219 ProvidedFullUnrollMaxCount);
1221 L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1222
1223
1224
1226 !OptForSize)
1228
1231
1234 LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");
1236 }
1237
1239 LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1240
1241
1242
1243 if (OptForSize)
1245
1247 LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1249 }
1250
1251
1252
1253
1254
1255
1256 unsigned TripCount = 0;
1257 unsigned TripMultiple = 1;
1259 L->getExitingBlocks(ExitingBlocks);
1260 for (BasicBlock *ExitingBlock : ExitingBlocks)
1262 if (!TripCount || TC < TripCount)
1263 TripCount = TripMultiple = TC;
1264
1265 if (!TripCount) {
1266
1267
1268
1269 BasicBlock *ExitingBlock = L->getLoopLatch();
1270 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1271 ExitingBlock = L->getExitingBlock();
1272 if (ExitingBlock)
1274 }
1275
1276
1277
1278
1279
1280
1281
1282
1284
1285
1286
1287 unsigned MaxTripCount = 0;
1288 bool MaxOrZero = false;
1289 if (!TripCount) {
1292 }
1293
1294
1295
1296 bool UseUpperBound = false;
1298 L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
1299 MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
1302
1304
1306 assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1307 LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1308 << " with iteration count " << PP.PeelCount << "!\n");
1309 ORE.emit([&]() {
1311 L->getHeader())
1313 << " iterations";
1314 });
1315
1318 VMap)) {
1320
1321
1323 L->setLoopAlreadyUnrolled();
1325 }
1327 }
1328
1329
1330 if (OnlyFullUnroll && ((!TripCount && !MaxTripCount) ||
1331 UP.Count < TripCount || UP.Count < MaxTripCount)) {
1333 dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
1335 }
1336
1337
1338
1339
1340
1341
1342 UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
1343
1344
1345 MDNode *OrigLoopID = L->getLoopID();
1346
1347
1348 Loop *RemainderLoop = nullptr;
1361 L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
1364
1365 if (RemainderLoop) {
1366 std::optional<MDNode *> RemainderLoopID =
1369 if (RemainderLoopID)
1370 RemainderLoop->setLoopID(*RemainderLoopID);
1371 }
1372
1374 std::optional<MDNode *> NewLoopID =
1377 if (NewLoopID) {
1378 L->setLoopID(*NewLoopID);
1379
1380
1381
1382 return UnrollResult;
1383 }
1384 }
1385
1386
1387
1389 L->setLoopAlreadyUnrolled();
1390
1391 return UnrollResult;
1392}
1393
1394namespace {
1395
1396class LoopUnroll : public LoopPass {
1397public:
1398 static char ID;
1399
1400 int OptLevel;
1401
1402
1403
1404
1405 bool OnlyWhenForced;
1406
1407
1408
1409
1410 bool ForgetAllSCEV;
1411
1412 std::optional ProvidedCount;
1413 std::optional ProvidedThreshold;
1414 std::optional ProvidedAllowPartial;
1415 std::optional ProvidedRuntime;
1416 std::optional ProvidedUpperBound;
1417 std::optional ProvidedAllowPeeling;
1418 std::optional ProvidedAllowProfileBasedPeeling;
1419 std::optional ProvidedFullUnrollMaxCount;
1420
1421 LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1422 bool ForgetAllSCEV = false,
1423 std::optional Threshold = std::nullopt,
1424 std::optional Count = std::nullopt,
1425 std::optional AllowPartial = std::nullopt,
1426 std::optional Runtime = std::nullopt,
1427 std::optional UpperBound = std::nullopt,
1428 std::optional AllowPeeling = std::nullopt,
1429 std::optional AllowProfileBasedPeeling = std::nullopt,
1430 std::optional ProvidedFullUnrollMaxCount = std::nullopt)
1431 : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1432 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1433 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1434 ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1435 ProvidedAllowPeeling(AllowPeeling),
1436 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1437 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1439 }
1440
1441 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1442 if (skipLoop(L))
1443 return false;
1444
1445 Function &F = *L->getHeader()->getParent();
1446
1447 auto &DT = getAnalysis().getDomTree();
1448 LoopInfo *LI = &getAnalysis().getLoopInfo();
1449 ScalarEvolution &SE = getAnalysis().getSE();
1450 const TargetTransformInfo &TTI =
1451 getAnalysis().getTTI(F);
1452 auto &AC = getAnalysis().getAssumptionCache(F);
1453
1454
1455
1456 OptimizationRemarkEmitter ORE(&F);
1457 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1458
1460 L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1461 false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
1462 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1463 ProvidedUpperBound, ProvidedAllowPeeling,
1464 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
1465
1466 if (Result == LoopUnrollResult::FullyUnrolled)
1468
1469 return Result != LoopUnrollResult::Unmodified;
1470 }
1471
1472
1473
1474 void getAnalysisUsage(AnalysisUsage &AU) const override {
1475 AU.addRequired();
1476 AU.addRequired();
1477
1478
1480 }
1481};
1482
1483}
1484
1485char LoopUnroll::ID = 0;
1486
1491INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1492
1494 bool ForgetAllSCEV, int Threshold, int Count,
1495 int AllowPartial, int Runtime, int UpperBound,
1496 int AllowPeeling) {
1497
1498
1499
1500 return new LoopUnroll(
1501 OptLevel, OnlyWhenForced, ForgetAllSCEV,
1502 Threshold == -1 ? std::nullopt : std::optional(Threshold),
1503 Count == -1 ? std::nullopt : std::optional(Count),
1504 AllowPartial == -1 ? std::nullopt : std::optional(AllowPartial),
1505 Runtime == -1 ? std::nullopt : std::optional(Runtime),
1506 UpperBound == -1 ? std::nullopt : std::optional(UpperBound),
1507 AllowPeeling == -1 ? std::nullopt : std::optional(AllowPeeling));
1508}
1509
1513
1514
1515
1517
1518
1519
1520 Loop *ParentL = L.getParentLoop();
1522 if (ParentL)
1524 else
1526
1527 std::string LoopName = std::string(L.getName());
1528
1531 nullptr, nullptr,
1532 true, OptLevel, true,
1533 OnlyWhenForced, ForgetSCEV, std::nullopt,
1534 std::nullopt, false,
1535 false, false,
1536 true,
1537 false,
1538 std::nullopt) !=
1542
1543
1544#ifndef NDEBUG
1545 if (ParentL)
1547#endif
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565 bool IsCurrentLoopValid = false;
1567 if (ParentL)
1569 else
1572 if (SibLoop == &L) {
1573 IsCurrentLoopValid = true;
1574 return true;
1575 }
1576
1577
1578 return OldLoops.contains(SibLoop);
1579 });
1581
1582 if (!IsCurrentLoopValid) {
1584 } else {
1585
1587
1590 }
1591 }
1592
1594}
1595
1599
1600
1609
1611 if (auto *LAMProxy = AM.getCachedResult(F))
1612 LAM = &LAMProxy->getManager();
1613
1617 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1619
1621
1622
1623
1624
1625
1626
1627 for (const auto &L : LI) {
1629 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false );
1631 }
1632
1633
1634
1637
1638 while (!Worklist.empty()) {
1639
1640
1641
1642
1644#ifndef NDEBUG
1645 Loop *ParentL = L.getParentLoop();
1646#endif
1647
1648
1649
1650
1651 std::optional LocalAllowPeeling = UnrollOpts.AllowPeeling;
1652 if (PSI && PSI->hasHugeWorkingSetSize())
1653 LocalAllowPeeling = false;
1654 std::string LoopName = std::string(L.getName());
1655
1656
1658 &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1659 true, UnrollOpts.OptLevel, false,
1660 UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
1661 std::nullopt,
1662 std::nullopt, UnrollOpts.AllowPartial,
1663 UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1664 UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,
1665 &AA);
1667
1668
1669#ifndef NDEBUG
1672#endif
1673
1674
1676 LAM->clear(L, LoopName);
1677 }
1678
1681
1683}
1684
1688 OS, MapClassName2PassName);
1689 OS << '<';
1690 if (UnrollOpts.AllowPartial != std::nullopt)
1691 OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";
1692 if (UnrollOpts.AllowPeeling != std::nullopt)
1693 OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";
1694 if (UnrollOpts.AllowRuntime != std::nullopt)
1695 OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";
1696 if (UnrollOpts.AllowUpperBound != std::nullopt)
1697 OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";
1698 if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)
1699 OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")
1700 << "profile-peeling;";
1701 if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
1702 OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';';
1703 OS << 'O' << UnrollOpts.OptLevel;
1704 OS << '>';
1705}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
static bool hasUnrollFullPragma(const Loop *L)
Definition LoopUnrollPass.cpp:750
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static unsigned unrollCountPragmaValue(const Loop *L)
Definition LoopUnrollPass.cpp:767
static bool hasUnrollEnablePragma(const Loop *L)
Definition LoopUnrollPass.cpp:756
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition LoopUnrollPass.cpp:838
static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
Definition LoopUnrollPass.cpp:357
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition LoopUnrollPass.cpp:869
static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
Definition LoopUnrollPass.cpp:184
static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
Definition LoopUnrollPass.cpp:798
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, AAResults *AA=nullptr)
Definition LoopUnrollPass.cpp:1158
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
Definition LoopUnrollPass.cpp:761
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
Definition LoopUnrollPass.cpp:785
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of " "iterations when checking full unroll profitability"))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
A manager for alias analyses.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
This is the shared class of boolean and integer constants.
This is an important base class in LLVM.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
void markLoopAsDeleted(Loop &L)
Analysis pass that exposes the LoopInfo for a function.
void verifyLoop() const
Verify loop structure.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition LoopUnrollPass.cpp:1510
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition LoopUnrollPass.cpp:1596
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition LoopUnrollPass.cpp:1685
Represents a single loop in the control flow graph.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool empty() const
Determine if the PriorityWorklist is empty or not.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Produce an estimate of the unrolled cost of the specified loop.
ConvergenceKind Convergence
bool ConvergenceAllowsRuntime
LLVM_ABI uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const
Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.
Definition LoopUnrollPass.cpp:729
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
Definition LoopUnrollPass.cpp:710
unsigned NumInlineCandidates
LLVM_ABI UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
Definition LoopUnrollPass.cpp:684
uint64_t getRolledLoopSize() const
void visit(Iterator Start, Iterator End)
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
iterator find(const_arg_type_t< ValueT > V)
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Abstract Attribute helper functions.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Return either:
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
LLVM_ABI void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, AAResults *AA=nullptr)
Perform some cleanup and simplifications on loops after unrolling.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Definition LoopUnrollPass.cpp:1493
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
LLVM_ABI void initializeLoopUnrollPass(PassRegistry &)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
LLVM_ABI TransformationMode hasUnrollAndJamTransformation(const Loop *L)
cl::opt< bool > ForgetSCEVInLoopUnroll
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
LLVM_TEMPLATE_ABI void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
FunctionAddr VTableAddr Count
LLVM_ABI TransformationMode hasUnrollTransformation(const Loop *L)
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition LoopUnrollPass.cpp:924
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
const char *const LLVMLoopUnrollFollowupAll
TransformationMode
The mode sets how eager a transformation should be applied.
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
@ TM_Disable
The transformation should not be applied.
@ TM_Enable
The transformation should be applied without considering a cost model.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition LoopUnrollPass.cpp:188
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
const char *const LLVMLoopUnrollFollowupRemainder
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const char *const LLVMLoopUnrollFollowupUnrolled
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
LLVM_ABI LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
A CRTP mix-in to automatically provide informational APIs needed for passes.
bool PeelLast
Peel off the last PeelCount loop iterations.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool RuntimeUnrollMultiExit
Allow runtime unrolling multi-exit loops.
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
bool AddAdditionalAccumulators
Allow unrolling to add parallel reduction phis.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
const Instruction * Heart
bool RuntimeUnrollMultiExit
bool AllowExpensiveTripCount
bool AddAdditionalAccumulators
unsigned SCEVExpansionBudget