LLVM: lib/Target/AArch64/SVEIntrinsicOpts.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
32#include "llvm/IR/IntrinsicsAArch64.h"
37#include
38
39using namespace llvm;
41
42#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
43
44namespace {
45struct SVEIntrinsicOpts : public ModulePass {
46 static char ID;
48
49 bool runOnModule(Module &M) override;
50 void getAnalysisUsage(AnalysisUsage &AU) const override;
51
52private:
53 bool coalescePTrueIntrinsicCalls(BasicBlock &BB,
58
60
61
62
64};
65}
66
67void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const {
68 AU.addRequired();
70}
71
72char SVEIntrinsicOpts::ID = 0;
73static const char *name = "SVE intrinsics optimizations";
77
79 return new SVEIntrinsicOpts();
80}
81
82
83
84
85
86
87
88
89
90
91
92
93
95
96
101 }
102 }
103
104
105 if (ConvertToUses.empty())
106 return false;
107
108
109
110
112 for (IntrinsicInst *ConvertToUse : ConvertToUses) {
115 if (IntrUser && IntrUser->getIntrinsicID() ==
116 Intrinsic::aarch64_sve_convert_from_svbool) {
118
119
120 if (IntrUserVTy->getElementCount().getKnownMinValue() >
121 PTrueVTy->getElementCount().getKnownMinValue())
122
123 return true;
124 }
125 }
126 }
127
128
129 return false;
130}
131
132
133bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(
134 BasicBlock &BB, SmallSetVector<IntrinsicInst *, 4> &PTrues) {
135 if (PTrues.size() <= 1)
136 return false;
137
138
139 auto *MostEncompassingPTrue =
143 return PTrue1VTy->getElementCount().getKnownMinValue() <
144 PTrue2VTy->getElementCount().getKnownMinValue();
145 });
146
147
148
149 PTrues.remove(MostEncompassingPTrue);
151
152
153
154
156
159 Builder.SetInsertPoint(&BB, ++MostEncompassingPTrue->getIterator());
160
161 auto *MostEncompassingPTrueVTy =
163 auto *ConvertToSVBool = Builder.CreateIntrinsic(
164 Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},
165 {MostEncompassingPTrue});
166
167 bool ConvertFromCreated = false;
168 for (auto *PTrue : PTrues) {
170
171
172
173 if (MostEncompassingPTrueVTy != PTrueVTy) {
174 ConvertFromCreated = true;
175
176 Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());
177 auto *ConvertFromSVBool =
178 Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
179 {PTrueVTy}, {ConvertToSVBool});
180 PTrue->replaceAllUsesWith(ConvertFromSVBool);
181 } else
182 PTrue->replaceAllUsesWith(MostEncompassingPTrue);
183
184 PTrue->eraseFromParent();
185 }
186
187
188 if (!ConvertFromCreated)
189 ConvertToSVBool->eraseFromParent();
190
191 return true;
192}
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
243 SmallSetVector<Function *, 4> &Functions) {
245
246 for (auto *F : Functions) {
247 for (auto &BB : *F) {
248 SmallSetVector<IntrinsicInst *, 4> SVAllPTrues;
249 SmallSetVector<IntrinsicInst *, 4> SVPow2PTrues;
250
251
252 for (Instruction &I : BB) {
253 if (I.use_empty())
254 continue;
255
257 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
258 continue;
259
260 const auto PTruePattern =
262
263 if (PTruePattern == AArch64SVEPredPattern::all)
264 SVAllPTrues.insert(IntrI);
265 if (PTruePattern == AArch64SVEPredPattern::pow2)
266 SVPow2PTrues.insert(IntrI);
267 }
268
269 Changed |= coalescePTrueIntrinsicCalls(BB, SVAllPTrues);
270 Changed |= coalescePTrueIntrinsicCalls(BB, SVPow2PTrues);
271 }
272 }
273
275}
276
277
278
279bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {
280 auto *F = I->getFunction();
281 auto Attr = F->getFnAttribute(Attribute::VScaleRange);
282 if (!Attr.isValid())
283 return false;
284
285 unsigned MinVScale = Attr.getVScaleRangeMin();
286 std::optional MaxVScale = Attr.getVScaleRangeMax();
287
288 if (!MaxVScale || MinVScale != MaxVScale)
289 return false;
290
291 auto *PredType =
293 auto *FixedPredType =
295
296
298 if (!Store || ->isSimple())
299 return false;
300
301
302 if (Store->getOperand(0)->getType() != FixedPredType)
303 return false;
304
305
307 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_extract)
308 return false;
309
310
312 return false;
313
314
316 if (!BitCast)
317 return false;
318
319
320 if (BitCast->getOperand(0)->getType() != PredType)
321 return false;
322
324 Builder.SetInsertPoint(I);
325
326 Builder.CreateStore(BitCast->getOperand(0), Store->getPointerOperand());
327
328 Store->eraseFromParent();
329 if (IntrI->use_empty())
330 IntrI->eraseFromParent();
331 if (BitCast->use_empty())
332 BitCast->eraseFromParent();
333
334 return true;
335}
336
337
338
339bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {
340 auto *F = I->getFunction();
341 auto Attr = F->getFnAttribute(Attribute::VScaleRange);
342 if (!Attr.isValid())
343 return false;
344
345 unsigned MinVScale = Attr.getVScaleRangeMin();
346 std::optional MaxVScale = Attr.getVScaleRangeMax();
347
348 if (!MaxVScale || MinVScale != MaxVScale)
349 return false;
350
351 auto *PredType =
353 auto *FixedPredType =
355
356
358 if (!BitCast || BitCast->getType() != PredType)
359 return false;
360
361
363 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_insert)
364 return false;
365
366
369 return false;
370
371
373 if (!Load || ->isSimple())
374 return false;
375
376
377 if (Load->getType() != FixedPredType)
378 return false;
379
381 Builder.SetInsertPoint(Load);
382
383 auto *LoadPred = Builder.CreateLoad(PredType, Load->getPointerOperand());
384
385 BitCast->replaceAllUsesWith(LoadPred);
386 BitCast->eraseFromParent();
387 if (IntrI->use_empty())
388 IntrI->eraseFromParent();
389 if (Load->use_empty())
390 Load->eraseFromParent();
391
392 return true;
393}
394
395bool SVEIntrinsicOpts::optimizeInstructions(
396 SmallSetVector<Function *, 4> &Functions) {
398
399 for (auto *F : Functions) {
400 DominatorTree *DT = &getAnalysis(*F).getDomTree();
401
402
403
405 ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
406 for (auto *BB : RPOT) {
408 switch (I.getOpcode()) {
409 case Instruction::Store:
410 Changed |= optimizePredicateStore(&I);
411 break;
412 case Instruction::BitCast:
413 Changed |= optimizePredicateLoad(&I);
414 break;
415 }
416 }
417 }
418 }
419
421}
422
423bool SVEIntrinsicOpts::optimizeFunctions(
424 SmallSetVector<Function *, 4> &Functions) {
426
427 Changed |= optimizePTrueIntrinsicCalls(Functions);
428 Changed |= optimizeInstructions(Functions);
429
431}
432
433bool SVEIntrinsicOpts::runOnModule(Module &M) {
435 SmallSetVector<Function *, 4> Functions;
436
437
438
439
440 for (auto &F : M.getFunctionList()) {
441 if (.isDeclaration())
442 continue;
443
444 switch (F.getIntrinsicID()) {
445 case Intrinsic::vector_extract:
446 case Intrinsic::vector_insert:
447 case Intrinsic::aarch64_sve_ptrue:
448 for (User *U : F.users())
450 break;
451 default:
452 break;
453 }
454 }
455
456 if (!Functions.empty())
457 Changed |= optimizeFunctions(Functions);
458
460}
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isPTruePromoted(IntrinsicInst *PTrue)
Checks if a ptrue intrinsic call is promoted.
Definition SVEIntrinsicOpts.cpp:94
This file implements a set that has insertion order iteration characteristics.
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Legacy analysis pass which computes a DominatorTree.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
bool remove(const value_type &X)
Remove an item from the set vector.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
size_type size() const
Determine the number of elements in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ModulePass * createSVEIntrinsicOptsPass()
Definition SVEIntrinsicOpts.cpp:78
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.