LLVM: lib/Target/ARM/MVELaneInterleavingPass.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
67#include
68
69using namespace llvm;
70
71#define DEBUG_TYPE "mve-laneinterleave"
72
75 cl::desc("Enable interleave MVE vector operation lowering"));
76
77namespace {
78
79class MVELaneInterleaving : public FunctionPass {
80public:
81 static char ID;
82
85 }
86
88
89 StringRef getPassName() const override { return "MVE lane interleaving"; }
90
91 void getAnalysisUsage(AnalysisUsage &AU) const override {
94 FunctionPass::getAnalysisUsage(AU);
95 }
96};
97
98}
99
100char MVELaneInterleaving::ID = 0;
101
103 false)
104
106 return new MVELaneInterleaving();
107}
108
111
112
113
114
115
116
117
118
119
120
121
122
123 for (auto *E : Exts) {
125 LLVM_DEBUG(dbgs() << "Beneficial due to " << *E << "\n");
126 return true;
127 }
128 }
129 for (auto *T : Truncs) {
131 LLVM_DEBUG(dbgs() << "Beneficial due to " << *T << "\n");
132 return true;
133 }
134 }
135
136
137
138 for (auto *E : Exts) {
139 if (->hasOneUse() ||
140 cast(*E->user_begin())->getOpcode() != Instruction::Mul) {
141 LLVM_DEBUG(dbgs() << "Not beneficial due to " << *E << "\n");
142 return false;
143 }
144 }
145 return true;
146}
147
150 LLVM_DEBUG(dbgs() << "tryInterleave from " << *Start << "\n");
151
153 return false;
154
155
156 std::vector<Instruction *> Worklist;
157 Worklist.push_back(Start);
159
165
166 while (!Worklist.empty()) {
168 Worklist.pop_back();
169
170 switch (I->getOpcode()) {
171
172 case Instruction::Trunc:
173 case Instruction::FPTrunc:
175 continue;
177 break;
178
179
180 case Instruction::SExt:
181 case Instruction::ZExt:
182 case Instruction::FPExt:
184 continue;
185 for (auto *Use : I->users())
188 break;
189
190 case Instruction::Call: {
192 if ()
193 return false;
194
195 if (II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
197 continue;
199 break;
200 }
201
202 switch (II->getIntrinsicID()) {
203 case Intrinsic::abs:
204 case Intrinsic::smin:
205 case Intrinsic::smax:
206 case Intrinsic::umin:
207 case Intrinsic::umax:
208 case Intrinsic::sadd_sat:
209 case Intrinsic::ssub_sat:
210 case Intrinsic::uadd_sat:
211 case Intrinsic::usub_sat:
212 case Intrinsic::minnum:
213 case Intrinsic::maxnum:
214 case Intrinsic::fabs:
215 case Intrinsic::fma:
216 case Intrinsic::ceil:
217 case Intrinsic:🤣
218 case Intrinsic::rint:
219 case Intrinsic::round:
220 case Intrinsic::trunc:
221 break;
222 default:
223 return false;
224 }
225 [[fallthrough]];
226 }
227
228 case Instruction::Add:
229 case Instruction::Sub:
230 case Instruction::Mul:
231 case Instruction::AShr:
232 case Instruction::LShr:
233 case Instruction::Shl:
234 case Instruction::ICmp:
235 case Instruction::FCmp:
236 case Instruction::FAdd:
237 case Instruction::FMul:
238 case Instruction::Select:
239 if (.insert(I))
240 continue;
241
242 for (Use &Op : I->operands()) {
244 continue;
247 else
249 }
250
251 for (auto *Use : I->users())
253 break;
254
255 case Instruction::ShuffleVector:
256
258 continue;
259 [[fallthrough]];
260
261 default:
262 LLVM_DEBUG(dbgs() << " Unhandled instruction: " << *I << "\n");
263 return false;
264 }
265 }
266
267 if (Exts.empty() && OtherLeafs.empty())
268 return false;
269
271 dbgs() << "Found group:\n Exts:\n";
272 for (auto *I : Exts)
273 dbgs() << " " << *I << "\n";
274 dbgs() << " Ops:\n";
276 dbgs() << " " << *I << "\n";
277 dbgs() << " OtherLeafs:\n";
278 for (auto *I : OtherLeafs)
279 dbgs() << " " << *I->get() << " of " << *I->getUser() << "\n";
280 dbgs() << " Truncs:\n";
281 for (auto *I : Truncs)
282 dbgs() << " " << *I << "\n";
283 dbgs() << " Reducts:\n";
284 for (auto *I : Reducts)
285 dbgs() << " " << *I << "\n";
286 });
287
289 "Expected some truncs or reductions");
291 return false;
292
293 auto *VT = !Truncs.empty()
297
298
299 unsigned NumElts = VT->getNumElements();
300 unsigned BaseElts = VT->getScalarSizeInBits() == 16
301 ? 8
302 : (VT->getScalarSizeInBits() == 8 ? 16 : 0);
303 if (BaseElts == 0 || NumElts % BaseElts != 0) {
305 return false;
306 }
307 if (Start->getOperand(0)->getType()->getScalarSizeInBits() !=
308 VT->getScalarSizeInBits() * 2) {
310 return false;
311 }
313 if (I->getOperand(0)->getType() != VT) {
315 return false;
316 }
318 if (I->getType() != VT) {
320 return false;
321 }
322
323
325 return false;
327 return I->getOpcode() == Instruction::Mul ||
328 I->getOpcode() == Instruction::Select ||
329 I->getOpcode() == Instruction::ICmp;
330 }))) {
331 LLVM_DEBUG(dbgs() << "Reduction does not look profitable\n");
332 return false;
333 }
334
335
337
340
341
342 for (unsigned Base = 0; Base < NumElts; Base += BaseElts) {
343 for (unsigned i = 0; i < BaseElts / 2; i++)
345 for (unsigned i = 0; i < BaseElts / 2; i++)
347 }
348 for (unsigned Base = 0; Base < NumElts; Base += BaseElts) {
349 for (unsigned i = 0; i < BaseElts / 2; i++) {
352 }
353 }
354
357 Builder.SetInsertPoint(I);
358 Value *Shuffle = Builder.CreateShuffleVector(I->getOperand(0), LeafMask);
361 Value *Ext = FPext ? Builder.CreateFPExt(Shuffle, I->getType())
362 : Sext ? Builder.CreateSExt(Shuffle, I->getType())
363 : Builder.CreateZExt(Shuffle, I->getType());
364 I->replaceAllUsesWith(Ext);
366 }
367
368 for (Use *I : OtherLeafs) {
371 Value *Shuffle = Builder.CreateShuffleVector(I->get(), LeafMask);
372 I->getUser()->setOperand(I->getOperandNo(), Shuffle);
374 }
375
378
379 Builder.SetInsertPoint(I->getParent(), ++I->getIterator());
380 Value *Shuf = Builder.CreateShuffleVector(I, TruncMask);
381 I->replaceAllUsesWith(Shuf);
383
385 }
386
387 return true;
388}
389
390
391
394 return II->getIntrinsicID() == Intrinsic::vector_reduce_add;
395 return false;
396}
397
398bool MVELaneInterleaving::runOnFunction(Function &F) {
400 return false;
401 auto &TPC = getAnalysis();
402 auto &TM = TPC.getTM();
403 auto *ST = &TM.getSubtarget(F);
404 if (->hasMVEIntegerOps())
405 return false;
406
408
409 SmallPtrSet<Instruction *, 16> Visited;
411 if (((I.getType()->isVectorTy() &&
416 }
417
419}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isProfitableToInterleave(SmallSetVector< Instruction *, 4 > &Exts, SmallSetVector< Instruction *, 4 > &Truncs)
Definition MVELaneInterleavingPass.cpp:109
static bool tryInterleave(Instruction *Start, SmallPtrSetImpl< Instruction * > &Visited)
Definition MVELaneInterleavingPass.cpp:148
static cl::opt< bool > EnableInterleave("enable-mve-interleave", cl::Hidden, cl::init(true), cl::desc("Enable interleave MVE vector operation lowering"))
static bool isAddReduction(Instruction &I)
Definition MVELaneInterleavingPass.cpp:392
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Pass * createMVELaneInterleavingPass()
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
void initializeMVELaneInterleavingPass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.