LLVM: lib/Transforms/Vectorize/VPlanSLP.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
33#include
34#include
35#include
36#include
37
38using namespace llvm;
39
40#define DEBUG_TYPE "vplan-slp"
41
42
44
46 Old2NewTy &Old2New,
51 visitBlock(Base, Old2New, IAI);
52 }
53}
54
55void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
58 for (VPRecipeBase &VPI : *VPBB) {
60 continue;
62 if (!VPInst)
63 continue;
65 if (!Inst)
66 continue;
68 if (!IG)
69 continue;
70
71 auto NewIGIter = Old2New.find(IG);
72 if (NewIGIter == Old2New.end())
73 Old2New[IG] = new InterleaveGroup(
74 IG->getFactor(), IG->isReverse(), IG->getAlign());
75
76 if (Inst == IG->getInsertPos())
77 Old2New[IG]->setInsertPos(VPInst);
78
79 InterleaveGroupMap[VPInst] = Old2New[IG];
80 InterleaveGroupMap[VPInst]->insertMember(
81 VPInst, IG->getIndex(Inst),
82 Align(IG->isReverse() ? (-1) * int(IG->getFactor())
83 : IG->getFactor()));
84 }
86 visitRegion(Region, Old2New, IAI);
87 } else {
89 }
90}
91
94 Old2NewTy Old2New;
96}
97
99
100
101 CompletelySLP = false;
102 return nullptr;
103}
104
108 })) {
109 unsigned BundleSize = 0;
110 for (VPValue *V : Operands) {
112 assert(->isVectorTy() && "Only scalar types supported for now");
113 BundleSize += T->getScalarSizeInBits();
114 }
115 WidestBundleBits = std::max(WidestBundleBits, BundleSize);
116 }
117
118 auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
120 "Already created a combined instruction for the operand bundle");
121 (void)Res;
122}
123
125
126 if ((Operands, [](VPValue *Op) {
129 })) {
130 LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
131 return false;
132 }
133
134
135
136
137
140 unsigned Opcode = OriginalInstr->getOpcode();
142 if ((Operands, [Opcode, Width](VPValue *Op) {
144 return I->getOpcode() == Opcode &&
145 I->getType()->getPrimitiveSizeInBits() == Width;
146 })) {
147 LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
148 return false;
149 }
150
151
152 if (any_of(Operands, [this](VPValue *Op) {
154 })) {
155 LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
156 return false;
157 }
158
160 [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
161 LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
162 return false;
163 }
164
165
166
167
168
169 if (Opcode == Instruction::Load) {
170 unsigned LoadsSeen = 0;
174 if (!VPI)
175 return false;
176 if (VPI->getOpcode() == Instruction::Load &&
178 LoadsSeen++;
179
180 if (LoadsSeen == Operands.size())
181 break;
182 if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
184 dbgs() << "VPSLP: instruction modifying memory between loads\n");
185 return false;
186 }
187 }
188
189 if ((Operands, [](VPValue *Op) {
191 ->isSimple();
192 })) {
193 LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
194 return false;
195 }
196 }
197
198 if (Opcode == Instruction::Store)
199 if ((Operands, [](VPValue *Op) {
201 ->isSimple();
202 })) {
203 LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
204 return false;
205 }
206
207 return true;
208}
209
211 unsigned OperandIndex) {
213 for (VPValue *V : Values) {
214
216 Operands.push_back(U->getOperand(OperandIndex));
217 }
218 return Operands;
219}
220
225
230
231 switch (VPI->getOpcode()) {
232 case Instruction::Load:
233 llvm_unreachable("Loads terminate a tree, no need to get operands");
234 case Instruction::Store:
235 Result.push_back(getOperands(Values, 0));
236 break;
237 default:
238 for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
240 break;
241 }
242
243 return Result;
244}
245
246
251 }))
252 return std::nullopt;
253 return {Opcode};
254}
255
256
257
260 if (A->getOpcode() != B->getOpcode())
261 return false;
262
263 if (A->getOpcode() != Instruction::Load &&
264 A->getOpcode() != Instruction::Store)
265 return true;
268
269 return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
270}
271
272
273
278
279 if (!I1 || !I2)
280 return 0;
281
282 if (MaxLevel == 0)
284
285 unsigned Score = 0;
286 for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)
287 for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
288 Score +=
289 getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);
290 return Score;
291}
292
293std::pair<VPlanSlp::OpMode, VPValue *>
297 assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
298 "Currently we only handle load and commutative opcodes");
300
304 for (auto *Candidate : Candidates) {
309 << " ");
310 BestCandidates.push_back(Candidate);
311 }
312 }
314
315 if (BestCandidates.empty())
316 return {OpMode::Failed, nullptr};
317
318 if (BestCandidates.size() == 1)
319 return {Mode, BestCandidates[0]};
320
321 VPValue *Best = nullptr;
322 unsigned BestScore = 0;
324 unsigned PrevScore = ~0u;
325 bool AllSame = true;
326
327
328 for (auto *Candidate : BestCandidates) {
330 if (PrevScore == ~0u)
331 PrevScore = Score;
332 if (PrevScore != Score)
333 AllSame = false;
334 PrevScore = Score;
335
336 if (Score > BestScore) {
337 BestScore = Score;
338 Best = Candidate;
339 }
340 }
341 if (!AllSame)
342 break;
343 }
346 << "\n");
347 Candidates.erase(Best);
348
349 return {Mode, Best};
350}
351
353 SmallVector<MultiNodeOpTy, 4> FinalOrder;
355 FinalOrder.reserve(MultiNodeOps.size());
356 Mode.reserve(MultiNodeOps.size());
357
359
360 for (auto &Operands : MultiNodeOps) {
361 FinalOrder.push_back({Operands.first, {Operands.second[0]}});
363 Instruction::Load)
364 Mode.push_back(OpMode::Load);
365 else
366 Mode.push_back(OpMode::Opcode);
367 }
368
369 for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
370 LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
371 SmallPtrSet<VPValue *, 4> Candidates;
373 for (auto Ops : MultiNodeOps) {
376 << " ");
377 Candidates.insert(Ops.second[Lane]);
378 }
380
381 for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
383 if (Mode[Op] == OpMode::Failed)
384 continue;
385
386 VPValue *Last = FinalOrder[Op].second[Lane - 1];
387 std::pair<OpMode, VPValue *> Res =
388 getBest(Mode[Op], Last, Candidates, IAI);
389 if (Res.second)
390 FinalOrder[Op].second.push_back(Res.second);
391 else
392
393 FinalOrder[Op].second.push_back(markFailed());
394 }
395 }
396
397 return FinalOrder;
398}
399
400#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
402 dbgs() << " Ops: ";
403 for (auto *Op : Values) {
405 if (auto *Instr = VPInstr->getUnderlyingInstr()) {
407 continue;
408 }
409 dbgs() << " nullptr | ";
410 }
411 dbgs() << "\n";
412}
413#endif
414
416 assert(!Values.empty() && "Need some operands!");
417
418
419 auto I = BundleToCombined.find(to_vector<4>(Values));
420 if (I != BundleToCombined.end()) {
421#ifndef NDEBUG
422
423
424
425 for (auto *V : Values) {
426 auto UI = V->user_begin();
427 auto *FirstUser = *UI++;
428 while (UI != V->user_end()) {
429 assert(*UI == FirstUser && "Currently we only support SLP trees.");
430 UI++;
431 }
432 }
433#endif
434 return I->second;
435 }
436
437
439 dbgs() << "buildGraph: ";
440 dumpBundle(Values);
441 });
442
443 if (!areVectorizable(Values))
444 return markFailed();
445
446 assert(getOpcode(Values) && "Opcodes for all values must match");
447 unsigned ValuesOpcode = *getOpcode(Values);
448
451 bool MultiNodeRoot = !MultiNodeActive;
452 MultiNodeActive = true;
453 for (auto &Operands : getOperands(Values)) {
455 dbgs() << " Visiting Commutative";
456 dumpBundle(Operands);
457 });
458
459 auto OperandsOpcode = getOpcode(Operands);
460 if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
461 LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
463 } else {
465
466
470 MultiNodeOps.emplace_back(Op, Operands);
471 }
472 }
473
474 if (MultiNodeRoot) {
476 MultiNodeActive = false;
477
478 auto FinalOrder = reorderMultiNodeOps();
479
480 MultiNodeOps.clear();
481 for (auto &Ops : FinalOrder) {
483 Ops.first->replaceAllUsesWith(NewOp);
484 for (unsigned i = 0; i < CombinedOperands.size(); i++)
485 if (CombinedOperands[i] == Ops.first)
486 CombinedOperands[i] = NewOp;
487 delete Ops.first;
488 Ops.first = NewOp;
489 }
491 }
492 } else {
494 if (ValuesOpcode == Instruction::Load)
495 for (VPValue *V : Values)
497 else
498 for (auto &Operands : getOperands(Values))
500 }
501
502 unsigned Opcode;
503 switch (ValuesOpcode) {
504 case Instruction::Load:
506 break;
507 case Instruction::Store:
509 break;
510 default:
511 Opcode = ValuesOpcode;
512 break;
513 }
514
515 if (!CompletelySLP)
516 return markFailed();
517
518 assert(CombinedOperands.size() > 0 && "Need more some operands");
520 auto *VPI =
521 new VPInstruction(Opcode, CombinedOperands, {}, {}, Inst->getDebugLoc());
522
523 LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " " << Values[0]
524 << "\n");
525 addCombined(Values, VPI);
526 return VPI;
527}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallVector class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static unsigned LookaheadMaxDepth
Definition VPlanSLP.cpp:43
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)
Returns true if A and B access sequential memory if they are loads or stores or if they have identica...
Definition VPlanSLP.cpp:258
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)
Implements getLAScore from Listing 7 in the paper.
Definition VPlanSLP.cpp:274
static bool areCommutative(ArrayRef< VPValue * > Values)
Definition VPlanSLP.cpp:221
static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue * > Values, unsigned OperandIndex)
Definition VPlanSLP.cpp:210
This file contains the declarations for VPlan-based SLP.
This file contains the declarations of the entities induced by Vectorization Plans,...
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Drive the analysis of interleaved memory accesses in the loop.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
This is a concrete Recipe that models a single VPlan-level instruction.
LLVM_ABI_FOR_TEST VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
Definition VPlanSLP.cpp:92
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
LLVM_ABI_FOR_TEST VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition VPlanSLP.cpp:415
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
NodeAddr< InstrNode * > Instr
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.