LLVM: lib/Transforms/Vectorize/VPlanUnroll.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
26
27using namespace llvm;
28
29namespace {
30
31
32
33
34
35class UnrollState {
36
38
39 const unsigned UF;
40
42
43
44
46
47
48
50
51
52 void unrollReplicateRegionByUF(VPRegionBlock *VPR);
53
54
55
57
58
59
60
63
64
65
68
69 VPValue *getConstantVPV(unsigned Part) {
71 return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
72 }
73
74public:
76 : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {}
77
79
81 if (Part == 0 || V->isLiveIn())
82 return V;
83 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
84 "accessed value does not exist");
85 return VPV2Parts[V][Part - 1];
86 }
87
88
89
90
92 unsigned Part) {
94 auto Ins = VPV2Parts.insert({VPV, {}});
95 assert(Ins.first->second.size() == Part - 1 && "earlier parts not set");
97 }
98 }
99
100
102 auto Ins = VPV2Parts.insert({R, {}});
103 assert(Ins.second && "uniform value already added");
104 for (unsigned Part = 0; Part != UF; ++Part)
105 Ins.first->second.push_back(R);
106 }
107
109
110
111
112 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
113 auto *Op = R->getOperand(OpIdx);
114 R->setOperand(OpIdx, getValueForPart(Op, Part));
115 }
116
117
119 for (const auto &[OpIdx, Op] : enumerate(R->operands()))
120 R->setOperand(OpIdx, getValueForPart(Op, Part));
121 }
122};
123}
124
125void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
127 for (unsigned Part = 1; Part != UF; ++Part) {
130
133 for (const auto &[PartIVPBB, Part0VPBB] :
134 zip(VPBlockUtils::blocksOnly(PartI),
135 VPBlockUtils::blocksOnly(Part0))) {
136 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
138 if (auto *ScalarIVSteps = dyn_cast(&PartIR)) {
139 ScalarIVSteps->addOperand(getConstantVPV(Part));
140 }
141
142 addRecipeForPart(&Part0R, &PartIR, Part);
143 }
144 }
145 }
146}
147
148void UnrollState::unrollWidenInductionByUF(
151 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
153 auto &ID = IV->getInductionDescriptor();
154 std::optional FMFs;
155 if (isa_and_present(ID.getInductionBinOp()))
156 FMFs = ID.getInductionBinOp()->getFastMathFlags();
157
162 IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc;
163 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
165 }
166
167 VPValue *ScalarStep = IV->getStepValue();
168 auto *ConstStep = ScalarStep->isLiveIn()
170 : nullptr;
171 if (!ConstStep || ConstStep->getValue() != 1) {
173 ScalarStep =
174 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
176 }
177
178 unsigned MulOpc =
179 IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;
180 VPInstruction *Mul = Builder.createNaryOp(MulOpc, {VectorStep, ScalarStep},
181 FMFs, IV->getDebugLoc());
182 VectorStep = Mul;
184 }
185
186
187
188
189
190
191
192
193
194
195
196
197
199 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
200 unsigned AddOpc =
202 for (unsigned Part = 1; Part != UF; ++Part) {
203 std::string Name =
204 Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
205
207 {
208 Prev,
209 VectorStep,
210 },
211 FMFs, IV->getDebugLoc(), Name);
213 addRecipeForPart(IV, Add, Part);
214 Prev = Add;
215 }
216 IV->addOperand(VectorStep);
217 IV->addOperand(Prev);
218}
219
222
223
224 if (isa(R))
225 return;
226
227
228 if (auto *IV = dyn_cast(R)) {
229 unrollWidenInductionByUF(IV, InsertPtForPhi);
230 return;
231 }
232
233 auto *RdxPhi = dyn_cast(R);
234 if (RdxPhi && RdxPhi->isOrdered())
235 return;
236
237 auto InsertPt = std::next(R->getIterator());
238 for (unsigned Part = 1; Part != UF; ++Part) {
240 Copy->insertBefore(*R->getParent(), InsertPt);
241 addRecipeForPart(R, Copy, Part);
242 if (isa(R)) {
243 Copy->addOperand(R);
244 Copy->addOperand(getConstantVPV(Part));
245 } else if (RdxPhi) {
246 Copy->addOperand(getConstantVPV(Part));
247 } else {
248 assert(isa(R) &&
249 "unexpected header phi recipe not needing unrolled part");
250 }
251 }
252}
253
254
255void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
259 return;
260
261 if (auto *VPI = dyn_cast(&R)) {
263 addUniformForAllParts(VPI);
264 return;
265 }
266 }
267 if (auto *RepR = dyn_cast(&R)) {
268 if (isa(RepR->getUnderlyingValue()) &&
269 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
270
272 return;
273 }
274 if (auto *II = dyn_cast(RepR->getUnderlyingValue())) {
275 if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
276 addUniformForAllParts(RepR);
277 return;
278 }
279 }
280 }
281
282
283 auto InsertPt = std::next(R.getIterator());
285 for (unsigned Part = 1; Part != UF; ++Part) {
287 Copy->insertBefore(VPBB, InsertPt);
288 addRecipeForPart(&R, Copy, Part);
289
291 if (match(&R, m_VPInstructionVPInstruction::FirstOrderRecurrenceSplice(
293 Copy->setOperand(0, getValueForPart(Op, Part - 1));
294 Copy->setOperand(1, getValueForPart(Op, Part));
295 continue;
296 }
297 if (auto *Red = dyn_cast(&R)) {
298 auto *Phi = cast(R.getOperand(0));
299 if (Phi->isOrdered()) {
300 auto &Parts = VPV2Parts[Phi];
301 if (Part == 1) {
303 Parts.push_back(Red);
304 }
305 Parts.push_back(Copy->getVPSingleValue());
306 Phi->setOperand(1, Copy->getVPSingleValue());
307 }
308 }
310
311
312
315 match(Copy, m_VPInstructionVPInstruction::CanonicalIVIncrementForPart(
317 Copy->addOperand(getConstantVPV(Part));
318
319 if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
320 Copy->setOperand(0, R.getOperand(0));
321 }
322}
323
325void UnrollState::unrollBlock(VPBlockBase *VPB) {
326 auto *VPR = dyn_cast(VPB);
327 if (VPR) {
329 return unrollReplicateRegionByUF(VPR);
330
331
332
336 unrollBlock(VPB);
337 return;
338 }
339
340
341 auto *VPBB = cast(VPB);
344 if (ToSkip.contains(&R) || isa(&R))
345 continue;
346
347
348
350 if (match(&R, m_VPInstructionVPInstruction::ComputeReductionResult(
352 addUniformForAllParts(cast(&R));
353 for (unsigned Part = 1; Part != UF; ++Part)
354 R.addOperand(getValueForPart(Op1, Part));
355 continue;
356 }
358 if (match(&R, m_VPInstructionVPInstruction::ExtractFromEnd(
360 addUniformForAllParts(cast(&R));
362
363
366 R.getVPSingleValue()->replaceAllUsesWith(
367 getValueForPart(Op0, UF - Offset));
368 R.eraseFromParent();
369 } else {
370
372 }
373 continue;
374 }
375
376 auto *SingleDef = dyn_cast(&R);
378 addUniformForAllParts(SingleDef);
379 continue;
380 }
381
382 if (auto *H = dyn_cast(&R)) {
383 unrollHeaderPHIByUF(H, InsertPtForPhi);
384 continue;
385 }
386
387 unrollRecipeByUF(R);
388 }
389}
390
392 assert(UF > 0 && "Unroll factor must be positive");
396
397 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) {
399 auto *VPI = dyn_cast(&R);
400 if (VPI &&
402 VPI->getNumOperands() == 1) {
403 VPI->replaceAllUsesWith(VPI->getOperand(0));
404 VPI->eraseFromParent();
405 }
406 }
407 }
408 });
409 if (UF == 1) {
410 return;
411 }
412
413 UnrollState Unroller(Plan, UF, Ctx);
414
415
416
417
421 Unroller.unrollBlock(VPB);
422
423 unsigned Part = 1;
424
425
426
429
430
431
432 if (isa(&H)) {
433 Unroller.remapOperand(&H, 1, UF - 1);
434 continue;
435 }
436 if (Unroller.contains(H.getVPSingleValue()) ||
437 isa(&H)) {
438 Part = 1;
439 continue;
440 }
441 Unroller.remapOperands(&H, Part);
442 Part++;
443 }
444
446}
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static const HTTPClientCleanup Cleanup
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file provides utility VPlan to VPlan transformations.
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
This class represents an Operation in the Expression.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
This is an important class for using LLVM in a threaded context.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy::iterator iterator
Instruction iterators...
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleSuccessor() const
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
VPlan-based builder utility analogous to IRBuilder.
Type * getScalarType() const
Returns the scalar type of the induction.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
This is a concrete Recipe that models a single VPlan-level instruction.
@ CanonicalIVIncrementForPart
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
A recipe to compute the pointers for widened memory accesses of IndexTy.
A Recipe for widening the canonical induction variable of the vector loop.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPBasicBlock * getEntry()
VPValue & getVF()
Returns the VF of the vector loop region.
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
bool match(Val *V, const Pattern &P)
BinaryVPInstruction_match< Op0_t, Op1_t, VPInstruction::BranchOnCount > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
UnaryVPInstruction_match< Op0_t, VPInstruction::BranchOnCond > m_BranchOnCond(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
NodeAddr< PhiNode * > Phi
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
static void unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx)
Explicitly unroll Plan by UF.
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.