LLVM: lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
23#include "llvm/IR/IntrinsicsAMDGPU.h"
28
29#define DEBUG_TYPE "amdgpu-late-codegenprepare"
30
31using namespace llvm;
32
33
34
35
36
38 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
39 cl::desc("Widen sub-dword constant address space loads in "
40 "AMDGPULateCodeGenPrepare"),
42
43namespace {
44
45class AMDGPULateCodeGenPrepare
46 : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
50
53
55
56public:
59 : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
60 bool run();
61 bool visitInstruction(Instruction &) { return false; }
62
63
64 bool isDWORDAligned(const Value *V) const {
67 }
68
69 bool canWidenScalarExtLoad(LoadInst &LI) const;
70 bool visitLoadInst(LoadInst &LI);
71};
72
74
75class LiveRegOptimizer {
76private:
78 const DataLayout &DL;
79 const GCNSubtarget &ST;
80
81
82 Type *const ConvertToScalar;
83
85
86 DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
87
88public:
89
90
91 Type *calculateConvertType(Type *OriginalType);
92
93
95
96
97
98 Value *convertFromOptType(Type *ConvertType, Instruction *V,
100 BasicBlock *InsertBlock);
101
102
103
104 bool optimizeLiveType(Instruction *I,
105 SmallVectorImpl &DeadInsts);
106
107
108
109 bool shouldReplace(Type *ITy) {
111 if (!VTy)
112 return false;
113
114 const auto *TLI = ST.getTargetLowering();
115
117
118
121 return false;
122
123
127 }
128
130
131 bool isCoercionProfitable(Instruction *II) {
132 SmallPtrSet<Instruction *, 4> CVisited;
133 SmallVector<Instruction *, 4> UserList;
134
135
136
137 for (User *V : II->users())
140
143 return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
144 return isa<PHINode, ShuffleVectorInst, InsertElementInst,
145 ExtractElementInst, CastInst>(II);
146 };
147
148 while (!UserList.empty()) {
150 if (!CVisited.insert(CII).second)
151 continue;
152
153
154
155 if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&
157 continue;
158
159 if (isOpLegal(CII))
160 return true;
161
162 if (IsLookThru(CII))
163 for (User *V : CII->users())
166 }
167 return false;
168 }
169
170 LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)
171 : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
172 ConvertToScalar(Type::getInt32Ty(Mod.getContext())) {}
173};
174
175}
176
177bool AMDGPULateCodeGenPrepare::run() {
178
179
180
181
182
183
184
185 LiveRegOptimizer LRO(*F.getParent(), ST);
186
188
189 bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
190
194 Changed |= LRO.optimizeLiveType(&I, DeadInsts);
195 }
196
199}
200
201Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
204
206
207 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);
208 TypeSize ConvertScalarSize = DL.getTypeSizeInBits(ConvertToScalar);
209 unsigned ConvertEltCount =
210 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;
211
212 if (OriginalSize <= ConvertScalarSize)
214
215 return VectorType::get(Type::getIntNTy(Mod.getContext(), ConvertScalarSize),
216 ConvertEltCount, false);
217}
218
219Value *LiveRegOptimizer::convertToOptType(Instruction *V,
222 Type *NewTy = calculateConvertType(V->getType());
223
224 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);
225 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
226
227 IRBuilder<> Builder(V->getParent(), InsertPt);
228
229
230 if (OriginalSize == NewSize)
231 return Builder.CreateBitCast(V, NewTy, V->getName() + ".bc");
232
233
234 assert(NewSize > OriginalSize);
236
237 SmallVector<int, 8> ShuffleMask;
239 for (unsigned I = 0; I < OriginalElementCount; I++)
241
242 for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
243 ShuffleMask.push_back(OriginalElementCount);
244
245 Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask);
246 return Builder.CreateBitCast(ExpandedVec, NewTy, V->getName() + ".bc");
247}
248
249Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
251 BasicBlock *InsertBB) {
253
254 TypeSize OriginalSize = DL.getTypeSizeInBits(V->getType());
255 TypeSize NewSize = DL.getTypeSizeInBits(NewVTy);
256
258
259 if (OriginalSize == NewSize)
260 return Builder.CreateBitCast(V, NewVTy, V->getName() + ".bc");
261
262
263
264 assert(OriginalSize > NewSize);
265
266 if (->getType()->isVectorTy()) {
270 }
271
272
273
274 VectorType *ExpandedVT = VectorType::get(
279
281 SmallVector<int, 8> ShuffleMask(NarrowElementCount);
282 std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);
283
284 return Builder.CreateShuffleVector(Converted, ShuffleMask);
285}
286
287bool LiveRegOptimizer::optimizeLiveType(
288 Instruction *I, SmallVectorImpl &DeadInsts) {
289 SmallVector<Instruction *, 4> Worklist;
290 SmallPtrSet<PHINode *, 4> PhiNodes;
291 SmallPtrSet<Instruction *, 4> Defs;
292 SmallPtrSet<Instruction *, 4> Uses;
293 SmallPtrSet<Instruction *, 4> Visited;
294
296 while (!Worklist.empty()) {
298
299 if (!Visited.insert(II).second)
300 continue;
301
302 if (!shouldReplace(II->getType()))
303 continue;
304
305 if (!isCoercionProfitable(II))
306 continue;
307
309 PhiNodes.insert(Phi);
310
311 for (Value *V : Phi->incoming_values()) {
312
314 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
316 continue;
317 }
318
320
322 return false;
323
324
325 if (IncInst)
326 Defs.insert(IncInst);
327 }
328 }
329
330
331 for (User *V : II->users()) {
332
334 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
336 continue;
337 }
338
340
342 Uses.insert(UseInst);
345 }
346 }
347 }
348
349
350 for (Instruction *D : Defs) {
353 Value *ConvertVal = convertToOptType(D, InsertPt);
355 ValMap[D] = ConvertVal;
356 }
357 }
358
359
360 for (PHINode *Phi : PhiNodes) {
362 Phi->getNumIncomingValues(),
363 Phi->getName() + ".tc", Phi->getIterator());
364 }
365
366
367 for (PHINode *Phi : PhiNodes) {
369 bool MissingIncVal = false;
370 for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {
371 Value *IncVal = Phi->getIncomingValue(I);
373 Type *NewType = calculateConvertType(Phi->getType());
374 NewPhi->addIncoming(ConstantInt::get(NewType, 0, false),
375 Phi->getIncomingBlock(I));
376 } else if (Value *Val = ValMap.lookup(IncVal))
378 else
379 MissingIncVal = true;
380 }
381 if (MissingIncVal) {
382 Value *DeadVal = ValMap[Phi];
383
384
386 SmallPtrSet<Value *, 4> VisitedPhis;
388 while (!PHIWorklist.empty()) {
390 VisitedPhis.insert(NextDeadValue);
391 auto OriginalPhi =
392 llvm::find_if(PhiNodes, [this, &NextDeadValue](PHINode *CandPhi) {
393 return ValMap[CandPhi] == NextDeadValue;
394 });
395
396
397 if (OriginalPhi != PhiNodes.end())
398 ValMap.erase(*OriginalPhi);
399
401
402 for (User *U : NextDeadValue->users()) {
405 }
406 }
407 } else {
409 }
410 }
411
412 for (Instruction *U : Uses) {
413
416 Value *NewVal = nullptr;
417 if (BBUseValMap.contains(U->getParent()) &&
418 BBUseValMap[U->getParent()].contains(Val))
419 NewVal = BBUseValMap[U->getParent()][Val];
420 else {
422
423
424
427 NewVal = Op;
428 } else {
429 NewVal =
431 InsertPt, U->getParent());
432 BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
433 }
434 }
436 U->setOperand(OpIdx, NewVal);
437 }
438 }
439 }
440
441 return true;
442}
443
444bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
446
449 return false;
450
452 return false;
454
456 return false;
457 unsigned TySize = DL.getTypeStoreSize(Ty);
458
459 if (TySize >= 4)
460 return false;
461
462 if (LI.getAlign() < DL.getABITypeAlign(Ty))
463 return false;
464
466}
467
468bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
470 return false;
471
472
473
475 return false;
476
477 if (!canWidenScalarExtLoad(LI))
478 return false;
479
483
484
485 if (!isDWORDAligned(Base))
486 return false;
487
488 int64_t Adjust = Offset & 0x3;
489 if (Adjust == 0) {
490
491
493 return true;
494 }
495
497 IRB.SetCurrentDebugLocation(LI.getDebugLoc());
498
499 unsigned LdBits = DL.getTypeStoreSizeInBits(LI.getType());
500 auto *IntNTy = Type::getIntNTy(LI.getContext(), LdBits);
501
502 auto *NewPtr = IRB.CreateConstGEP1_64(
503 IRB.getInt8Ty(),
506
507 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));
509 NewLd->setMetadata(LLVMContext::MD_range, nullptr);
510
511 unsigned ShAmt = Adjust * 8;
512 Value *NewVal = IRB.CreateBitCast(
513 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt),
514 DL.typeSizeEqualsStoreSize(LI.getType()) ? IntNTy
519
520 return true;
521}
522
523PreservedAnalyses
528
529 bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
530
535 return PA;
536}
537
539public:
541
543
545 return "AMDGPU IR late optimizations";
546 }
547
555
557};
558
561 return false;
562
566
571
572 return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
573}
574
576 "AMDGPU IR late optimizations", false, false)
582
584
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
Machine Check Debug Module
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
Target-Independent Code Generator Pass Configuration Options pass.
LLVM IR instance of the generic uniformity analysis.
static char ID
Definition AMDGPULateCodeGenPrepare.cpp:540
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition AMDGPULateCodeGenPrepare.cpp:559
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition AMDGPULateCodeGenPrepare.cpp:548
AMDGPULateCodeGenPrepareLegacy()
Definition AMDGPULateCodeGenPrepare.cpp:542
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition AMDGPULateCodeGenPrepare.cpp:544
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
Definition AMDGPULateCodeGenPrepare.cpp:524
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
FunctionPass class - This class is used to implement most global optimizations.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
AnalysisType & getAnalysis() const
getAnalysis() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
bool isAggregateType() const
Return true if the type is an aggregate type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto reverse(ContainerTy &&C)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()
Definition AMDGPULateCodeGenPrepare.cpp:585
DenseMap< const Value *, Value * > ValueToValueMap
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.