LLVM: lib/Transforms/Vectorize/LoopVectorizationPlanner.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
49
52
53
57
58
59 template T *tryInsertInstruction(T *R) {
60 if (BB)
61 BB->insert(R, InsertPt);
62 return R;
63 }
64
68 const Twine &Name = "") {
69 return tryInsertInstruction(
71 }
72
73public:
80
81
82
87
90
91
94 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
95 return B;
96 }
97
98
102
103 public:
104
106
107
109 : Block(InsertBlock), Point(InsertPoint) {}
110
111
112 bool isSet() const { return Block != nullptr; }
113
116 };
117
118
125
126
127
129 assert(TheBB && "Attempting to set a null insert point");
130 BB = TheBB;
131 InsertPt = BB->end();
132 }
133
134
135
137 BB = TheBB;
138 InsertPt = IP;
139 }
140
141
142
147
148
150
151
152
158 const Twine &Name = "") {
159 VPInstruction *NewVPInst = tryInsertInstruction(
160 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
161 NewVPInst->setUnderlyingValue(Inst);
162 return NewVPInst;
163 }
166 return createInstruction(Opcode, Operands, {}, DL, Name);
167 }
171 const Twine &Name = "") {
172 return tryInsertInstruction(
173 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
174 }
175
179 const Twine &Name = "") {
181 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
182 }
183
188 return tryInsertInstruction(
189 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
190 }
191
194 const Twine &Name = "") {
196 }
197
200 const Twine &Name = "") {
201 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
202 Name);
203 }
204
207 const Twine &Name = "") {
208
210 Instruction::BinaryOps::Or, {LHS, RHS},
212 }
213
216 const Twine &Name = "") {
218 }
219
223 std::optional FMFs = std::nullopt) {
224 if (!FMFs)
225 return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL,
226 Name);
228 Instruction::Select, {Cond, TrueVal, FalseVal}, *FMFs, {}, DL, Name));
229 }
230
231
232
235 const Twine &Name = "") {
238 return tryInsertInstruction(
239 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
240 }
241
242
243
246 const Twine &Name = "") {
249 return tryInsertInstruction(
250 new VPInstruction(Instruction::FCmp, {A, B}, Pred, {}, DL, Name));
251 }
252
255 const Twine &Name = "") {
256 return tryInsertInstruction(
259 }
260
264 const Twine &Name = "") {
267 }
268
271 const Twine &Name = "") {
272 return tryInsertInstruction(
275 }
276
278 const Twine &Name = "") {
279 return tryInsertInstruction(new VPPhi(IncomingValues, DL, Name));
280 }
281
285 if (EC.isScalable()) {
287 RuntimeEC = EC.getKnownMinValue() == 1
288 ? VScale
290 {VScale, RuntimeEC}, {true, false});
291 }
292 return RuntimeEC;
293 }
294
295
296
297
301 const Twine &Name = "") {
302 return tryInsertInstruction(
303 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
304 }
305
310 return tryInsertInstruction(
311 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
312 }
313
316 if (ResultTy == SrcTy)
317 return Op;
320 ? Instruction::Trunc
321 : Instruction::ZExt;
323 }
324
326 Type *ResultTy) {
328 if (Opcode == Instruction::Trunc)
330 else if (Opcode == Instruction::ZExt)
332 return tryInsertInstruction(
334 }
335
341 IV, Step, VF, InductionOpcode,
343 }
344
348
349
350
351
352
353
354
359
360 public:
363
366
368 };
369};
370
371
372
373
374
375
376
378
380
381
383
384
386
387
388
390
394
395
399
403
405 return !(*this == rhs);
406 }
407};
408
409
410
411
412
413
417
428 "Invalid scalable properties");
429 }
430
432
433
435
436
438};
439
440
441
443
444 Loop *OrigLoop;
445
446
448
449
451
452
454
455
457
458
460
461
463
464
466
468
470
472
474
475
477
478
480
481
482
483
484
485
486
487
488
490
491
492
493
496
497public:
504 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
505 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
506
507
508
509
511
512
513
515
516
517
519
520
521
523
524
525
526
527
530
531
532
533
534
535
536
537
538
539
540
545 bool VectorizingEpilogue);
546
547#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
549#endif
550
551
552
554 return any_of(VPlans,
555 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
556 }
557
558
559
560
561 static bool
564
565
566
567
570
571
573
574
575
578
579
580
581
582
583
584
585
586
589 bool VectorizingEpilogue, MDNode *OrigLoopID,
590 std::optional OrigAverageTripCount,
591 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
592 bool DisableRuntimeUnroll);
593
594protected:
595
596
597
599
600private:
601
602
603
604
605
607
608
609
610
611
612
613
614
617
618
619
620
622
623
624
625
626
627
628 void adjustRecipesForReductions(VPlanPtr &Plan,
631
632
633 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
634 bool HasBranchWeights) const;
635
636#ifndef NDEBUG
637
638
639
640
641
643#endif
644
645
646
649 bool IsEpilogue = false) const;
650
651
652
655 const unsigned MaxTripCount, bool HasTail,
656 bool IsEpilogue = false) const;
657
658
659
660 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
661};
662
663}
664
665#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
static DebugLoc getUnknown()
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getFixed(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC)
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
Definition LoopVectorizationPlanner.h:498
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
VectorizationFactor computeBestVF()
Compute and return the most profitable vectorization factor.
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool VectorizingEpilogue)
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Definition LoopVectorizationPlanner.h:553
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy::iterator iterator
Instruction iterators...
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard(VPBuilder &B)
Definition LoopVectorizationPlanner.h:361
InsertPointGuard & operator=(const InsertPointGuard &)=delete
~InsertPointGuard()
Definition LoopVectorizationPlanner.h:367
InsertPoint - A saved insertion point.
Definition LoopVectorizationPlanner.h:99
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
Definition LoopVectorizationPlanner.h:108
VPBasicBlock * getBlock() const
Definition LoopVectorizationPlanner.h:114
VPBasicBlock::iterator getPoint() const
Definition LoopVectorizationPlanner.h:115
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
Definition LoopVectorizationPlanner.h:112
VPlan-based builder utility analogous to IRBuilder.
Definition LoopVectorizationPlanner.h:54
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:205
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
Definition LoopVectorizationPlanner.h:314
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
Definition LoopVectorizationPlanner.h:136
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
Definition LoopVectorizationPlanner.h:143
VPValue * createElementCount(Type *Ty, ElementCount EC)
Definition LoopVectorizationPlanner.h:282
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition LoopVectorizationPlanner.h:119
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
Definition LoopVectorizationPlanner.h:306
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:192
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:214
VPBasicBlock * getInsertBlock() const
Definition LoopVectorizationPlanner.h:88
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
Definition LoopVectorizationPlanner.h:298
void insert(VPRecipeBase *R)
Insert R at the current insertion point.
Definition LoopVectorizationPlanner.h:149
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
Definition LoopVectorizationPlanner.h:221
VPBasicBlock::iterator getInsertPoint() const
Definition LoopVectorizationPlanner.h:89
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
Definition LoopVectorizationPlanner.h:337
VPBuilder(VPBasicBlock *InsertBB)
Definition LoopVectorizationPlanner.h:75
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:261
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
Definition LoopVectorizationPlanner.h:244
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:253
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
Definition LoopVectorizationPlanner.h:92
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition LoopVectorizationPlanner.h:164
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:184
VPBuilder(VPRecipeBase *InsertPt)
Definition LoopVectorizationPlanner.h:76
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
Definition LoopVectorizationPlanner.h:277
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition LoopVectorizationPlanner.h:325
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
Definition LoopVectorizationPlanner.h:233
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Definition LoopVectorizationPlanner.h:83
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:198
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:176
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:168
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
Definition LoopVectorizationPlanner.h:345
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
Definition LoopVectorizationPlanner.h:77
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition LoopVectorizationPlanner.h:269
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Definition LoopVectorizationPlanner.h:128
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Definition LoopVectorizationPlanner.h:153
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Recipe to expand a SCEV expression.
Class to record and manage LLVM IR flags.
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
This is a concrete Recipe that models a single VPlan-level instruction.
@ VScale
Returns the value for vscale.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
VPWidenCastRecipe is a recipe to create vector cast instructions.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This is an optimization pass for GlobalISel generic memory operations.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > ForceTargetInstructionCost
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
ElementCount FixedVF
Definition LoopVectorizationPlanner.h:415
ElementCount ScalableVF
Definition LoopVectorizationPlanner.h:416
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
Definition LoopVectorizationPlanner.h:424
FixedScalableVFPair(const ElementCount &Max)
Definition LoopVectorizationPlanner.h:421
bool hasVector() const
Definition LoopVectorizationPlanner.h:437
static FixedScalableVFPair getNone()
Definition LoopVectorizationPlanner.h:431
FixedScalableVFPair()
Definition LoopVectorizationPlanner.h:418
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Definition LoopVectorizationPlanner.h:377
InstructionCost Cost
Cost of the loop with that width.
Definition LoopVectorizationPlanner.h:382
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
Definition LoopVectorizationPlanner.h:389
bool operator==(const VectorizationFactor &rhs) const
Definition LoopVectorizationPlanner.h:400
ElementCount Width
Vector width with best cost.
Definition LoopVectorizationPlanner.h:379
InstructionCost ScalarCost
Cost of the scalar loop.
Definition LoopVectorizationPlanner.h:385
bool operator!=(const VectorizationFactor &rhs) const
Definition LoopVectorizationPlanner.h:404
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
Definition LoopVectorizationPlanner.h:396
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)
Definition LoopVectorizationPlanner.h:391