LLVM: lib/Transforms/Vectorize/LoopVectorizationPlanner.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
30
31namespace llvm {
32
33class LoopInfo;
34class DominatorTree;
35class LoopVectorizationLegality;
36class LoopVectorizationCostModel;
37class PredicatedScalarEvolution;
38class LoopVectorizeHints;
39class OptimizationRemarkEmitter;
40class TargetTransformInfo;
41class TargetLibraryInfo;
42class VPRecipeBuilder;
43
44
48
49
50 template T *tryInsertInstruction(T *R) {
51 if (BB)
52 BB->insert(R, InsertPt);
53 return R;
54 }
55
60 }
61
63 std::initializer_list<VPValue *> Operands,
66 }
67
68public:
74 }
75
76
77
79 BB = nullptr;
81 }
82
85
86
89 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
90 return B;
91 }
92
93
97
98 public:
99
101
102
104 : Block(InsertBlock), Point(InsertPoint) {}
105
106
107 bool isSet() const { return Block != nullptr; }
108
111 };
112
113
117 else
119 }
120
121
122
124 assert(TheBB && "Attempting to set a null insert point");
125 BB = TheBB;
126 InsertPt = BB->end();
127 }
128
129
130
132 BB = TheBB;
133 InsertPt = IP;
134 }
135
136
137
141 }
142
143
144
149 if (Inst)
150 DL = Inst->getDebugLoc();
153 return NewVPInst;
154 }
157 return createInstruction(Opcode, Operands, DL, Name);
158 }
160 std::initializer_list<VPValue *> Operands,
161 std::optional FMFs = {},
163 if (FMFs)
164 return tryInsertInstruction(
165 new VPInstruction(Opcode, Operands, *FMFs, DL, Name));
166 return createInstruction(Opcode, Operands, DL, Name);
167 }
168
170 std::initializer_list<VPValue *> Operands,
173 return tryInsertInstruction(
175 }
176
180 }
181
184 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
185 }
186
189
191 Instruction::BinaryOps::Or, {LHS, RHS},
192 VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
193 }
194
197 return tryInsertInstruction(
199 }
200
203 std::optional FMFs = std::nullopt) {
205 FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
207 : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
209 return tryInsertInstruction(Select);
210 }
211
212
213
214
219 return tryInsertInstruction(
221 }
222
225 return tryInsertInstruction(
227 }
230 return tryInsertInstruction(
232 }
233
234
235
236
241 return tryInsertInstruction(
243 }
244
247 return tryInsertInstruction(
249 }
250
252 Type *ResultTy) {
253 return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
254 }
255
260 IV, Step, InductionOpcode,
262 }
263
264
265
266
267
268
269
274
275 public:
278
281
283 };
284};
285
286
287
288
289
290
291
293
295
296
298
299
301
302
303
305
309
310
313 }
314
317 }
318
320 return !(*this == rhs);
321 }
322};
323
324
325
326
327
328
332
338 }
343 "Invalid scalable properties");
344 }
345
347
348
350
351
353};
354
355
356
358
359 Loop *OrigLoop;
360
361
363
364
366
367
369
370
372
373
375
376
378
379
381
383
385
387
389
390
392
393
395
396
397
398
399
400
401
402
403
405
406
407
408
411
412public:
419 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
420 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
421
422
423
424
426
427
428
430
431
432
434
435
436
438
439
440
441
442
443
444
445
446
447
448
449
450
454 bool VectorizingEpilogue,
456
457#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
459#endif
460
461
462
464 return any_of(VPlans,
465 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
466 }
467
468
469
470
471 static bool
474
475
476
477
480
481
483
484protected:
485
486
487
489
490private:
491
492
493
495
496
497
498
499
500
501
503
504
505
506
508
509
510
511
512
513
514 void adjustRecipesForReductions(VPlanPtr &Plan,
517
518#ifndef NDEBUG
519
520
521
522
523
525#endif
526
527
528
531
532
533
536 const unsigned MaxTripCount) const;
537
538
539
540 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
541};
542
543}
544
545#endif
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
mir Rename Register Operands
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This class represents an Operation in the Expression.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getFixed(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Planner drives the vectorization process after having passed Legality checks.
VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC)
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
VectorizationFactor computeBestVF()
Compute and return the most profitable vectorization factor.
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool VectorizingEpilogue, const DenseMap< const SCEV *, Value * > *ExpandedSCEVs=nullptr)
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy::iterator iterator
Instruction iterators...
void insert(VPRecipeBase *Recipe, iterator InsertPt)
RAII object that stores the current insertion point and restores it when the object is destroyed.
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard(VPBuilder &B)
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock * getBlock() const
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL={}, const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPValue * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL={}, const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL={}, const Twine &Name="")
VPValue * createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL={}, const Twine &Name="")
VPBuilder(VPBasicBlock *InsertBB)
VPScalarCastRecipe * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step)
VPBuilder(VPRecipeBase *InsertPt)
VPInstruction * createOverflowingOp(unsigned Opcode, std::initializer_list< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPValue * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL={}, const Twine &Name="")
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPValue * createNot(VPValue *Operand, DebugLoc DL={}, const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, std::initializer_list< VPValue * > Operands, std::optional< FastMathFlags > FMFs={}, DebugLoc DL={}, const Twine &Name="")
VPValue * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL={}, const Twine &Name="")
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPValue * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL={}, const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
This is a concrete Recipe that models a single VPlan-level instruction.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
Helper class to create VPRecipies from IR instructions.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
void setUnderlyingValue(Value *Val)
VPWidenCastRecipe is a recipe to create vector cast instructions.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This is an optimization pass for GlobalISel generic memory operations.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
std::unique_ptr< VPlan > VPlanPtr
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)