LLVM: lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
43#include
44#include
45
46namespace llvm {
49}
50
51using namespace llvm;
52
53#define DEBUG_TYPE "loop-unroll-and-jam"
54
55
56
58 "llvm.loop.unroll_and_jam.followup_all";
60 "llvm.loop.unroll_and_jam.followup_inner";
62 "llvm.loop.unroll_and_jam.followup_outer";
64 "llvm.loop.unroll_and_jam.followup_remainder_inner";
66 "llvm.loop.unroll_and_jam.followup_remainder_outer";
67
68
71 cl::desc("Allows loops to be unroll-and-jammed."));
72
75 cl::desc("Use this unroll count for all loops including those with "
76 "unroll_and_jam_count pragma values, for testing purposes"));
77
80 cl::desc("Threshold to use for inner loop when doing unroll and jam."));
81
84 cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
85 "unroll_count pragma."));
86
87
88
89
91 if (MDNode *LoopID = L->getLoopID())
93 return nullptr;
94}
95
96
97
99 if (MDNode *LoopID = L->getLoopID()) {
100
101 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
102 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
103
104 for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
106 if (!MD)
107 continue;
108
110 if (!S)
111 continue;
112
114 return true;
115 }
116 }
117 return false;
118}
119
120
124
125
126
129 if (MD) {
131 "Unroll count hint metadata should have two operands.");
134 assert(Count >= 1 && "Unroll count must be positive.");
136 }
137 return 0;
138}
139
140
144 assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
146}
147
148
149
156 unsigned InnerTripCount, unsigned InnerLoopSize,
160
161
162
163
164
165 unsigned MaxTripCount = 0;
166 bool UseUpperBound = false;
168 L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
169 false, OuterTripMultiple, OuterUCE, UP, PP,
170 UseUpperBound);
171 if (ExplicitUnroll || UseUpperBound) {
172
173
174 LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
175 "computeUnrollCount\n");
177 return false;
178 }
179
180
181 bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
182 if (UserUnrollCount) {
189 return true;
190 }
191
192
194 if (PragmaCount > 0) {
195 UP.Count = PragmaCount;
198 if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
202 return true;
203 }
204
206 bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
207 bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
208
209
210
211 if (ExplicitUnrollAndJam)
213
216 LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
217 "inner loop too large\n");
219 return false;
220 }
221
222
223
224
225 if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
229 }
230
231
232
233 if (ExplicitUnrollAndJam)
234 return true;
235
236
237
238 if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
239 LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
240 "being left for the unroller\n");
242 return false;
243 }
244
245
246
247 if (SubLoop->getBlocks().size() != 1) {
249 dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
251 return false;
252 }
253
254
255
256
257 unsigned NumInvariant = 0;
261 Value *V = Ld->getPointerOperand();
264 NumInvariant++;
265 }
266 }
267 }
268 if (NumInvariant == 0) {
269 LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
271 return false;
272 }
273
274 return false;
275}
276
283 L, SE, TTI, nullptr, nullptr, ORE, OptLevel, std::nullopt, std::nullopt,
284 std::nullopt, std::nullopt, std::nullopt, std::nullopt);
287
293
298
301
303 << L->getHeader()->getParent()->getName() << "] Loop %"
304 << L->getHeader()->getName() << "\n");
305
306
307
308
309
314 }
315
317 LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
319 }
320
321
324 Loop *SubLoop = L->getSubLoops()[0];
327
329 LLVM_DEBUG(dbgs() << " Loop not considered unrollable\n");
331 }
332
335 << "\n");
336 LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
337
339 LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
341 }
342
343
347 dbgs() << " Not unrolling loop with convergent instructions.\n");
349 }
350
351
352 MDNode *OrigOuterLoopID = L->getLoopID();
354
355
356
357
361 if (NewInnerEpilogueLoopID)
362 SubLoop->setLoopID(*NewInnerEpilogueLoopID);
363
364
365 BasicBlock *Latch = L->getLoopLatch();
370
371
373 L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,
374 OuterTripMultiple, OuterUCE, InnerTripCount, InnerLoopSize, UP, PP);
375 if (UP.Count <= 1)
377
378 if (OuterTripCount && UP.Count > OuterTripCount)
379 UP.Count = OuterTripCount;
380
381 Loop *EpilogueOuterLoop = nullptr;
384 &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);
385
386
387 if (EpilogueOuterLoop) {
391 if (NewOuterEpilogueLoopID)
392 EpilogueOuterLoop->setLoopID(*NewOuterEpilogueLoopID);
393 }
394
395 std::optional<MDNode *> NewInnerLoopID =
398 if (NewInnerLoopID)
399 SubLoop->setLoopID(*NewInnerLoopID);
400 else
401 SubLoop->setLoopID(OrigSubLoopID);
402
405 OrigOuterLoopID,
407 if (NewOuterLoopID) {
408 L->setLoopID(*NewOuterLoopID);
409
410
411 return UnrollResult;
412 }
413 }
414
415
416
418 L->setLoopAlreadyUnrolled();
419
420 return UnrollResult;
421}
422
428 LPMUpdater &U, bool &AnyLoopRemoved) {
429 bool DidSomething = false;
432
433
434
437 while (!Worklist.empty()) {
439 std::string LoopName = std::string(L->getName());
443 DidSomething = true;
445 if (L == OutmostLoop)
446 U.markLoopAsDeleted(*L, LoopName);
447 AnyLoopRemoved = true;
448 }
449 }
450
451 return DidSomething;
452}
453
459
462
463 bool AnyLoopRemoved = false;
465 OptLevel, U, AnyLoopRemoved))
467
469 if (!AnyLoopRemoved)
471 return PA;
472}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static const char *const LLVMLoopUnrollAndJamFollowupInner
Definition LoopUnrollAndJamPass.cpp:59
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
Definition LoopUnrollAndJamPass.cpp:63
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
Definition LoopUnrollAndJamPass.cpp:65
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Definition LoopUnrollAndJamPass.cpp:90
static const char *const LLVMLoopUnrollAndJamFollowupOuter
Definition LoopUnrollAndJamPass.cpp:61
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
Definition LoopUnrollAndJamPass.cpp:150
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
Definition LoopUnrollAndJamPass.cpp:142
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
Definition LoopUnrollAndJamPass.cpp:278
static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix)
Definition LoopUnrollAndJamPass.cpp:98
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
static unsigned unrollAndJamCountPragmaValue(const Loop *L)
Definition LoopUnrollAndJamPass.cpp:127
static bool hasUnrollAndJamEnablePragma(const Loop *L)
Definition LoopUnrollAndJamPass.cpp:121
static const char *const LLVMLoopUnrollAndJamFollowupAll
Definition LoopUnrollAndJamPass.cpp:57
This file provides a priority worklist.
This file defines the SmallPtrSet class.
This pass exposes codegen information to IR-level passes.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
DependenceInfo - This class is the main dependence-analysis driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
This analysis provides information for a loop nest.
This class represents a loop nest and can be used to query its properties.
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition LoopUnrollAndJamPass.cpp:454
Represents a single loop in the control flow graph.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool empty() const
Determine if the PriorityWorklist is empty or not.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Produce an estimate of the unrolled cost of the specified loop.
ConvergenceKind Convergence
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
unsigned NumInlineCandidates
uint64_t getRolledLoopSize() const
LLVM Value Representation.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI TransformationMode hasUnrollAndJamTransformation(const Loop *L)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_TEMPLATE_ABI void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
FunctionAddr VTableAddr Count
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
TransformationMode
The mode sets how eager a transformation should be applied.
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
@ TM_Disable
The transformation should not be applied.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LLVM_ABI LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...