LLVM: lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
24#include "llvm/IR/IntrinsicsAMDGPU.h"
28
29#define DEBUG_TYPE "amdgpu-lower-kernel-attributes"
30
31using namespace llvm;
32
33namespace {
34
35
36enum DispatchPackedOffsets {
37 WORKGROUP_SIZE_X = 4,
38 WORKGROUP_SIZE_Y = 6,
39 WORKGROUP_SIZE_Z = 8,
40
41 GRID_SIZE_X = 12,
42 GRID_SIZE_Y = 16,
43 GRID_SIZE_Z = 20
44};
45
46
47enum ImplicitArgOffsets {
48 HIDDEN_BLOCK_COUNT_X = 0,
49 HIDDEN_BLOCK_COUNT_Y = 4,
50 HIDDEN_BLOCK_COUNT_Z = 8,
51
52 HIDDEN_GROUP_SIZE_X = 12,
53 HIDDEN_GROUP_SIZE_Y = 14,
54 HIDDEN_GROUP_SIZE_Z = 16,
55
56 HIDDEN_REMAINDER_X = 18,
57 HIDDEN_REMAINDER_Y = 20,
58 HIDDEN_REMAINDER_Z = 22,
59};
60
61class AMDGPULowerKernelAttributes : public ModulePass {
62public:
63 static char ID;
64
65 AMDGPULowerKernelAttributes() : ModulePass(ID) {}
66
67 bool runOnModule(Module &M) override;
68
69 StringRef getPassName() const override {
70 return "AMDGPU Kernel Attributes";
71 }
72
73 void getAnalysisUsage(AnalysisUsage &AU) const override {
75 }
76};
77
78Function *getBasePtrIntrinsic(Module &M, bool IsV5OrAbove) {
79 auto IntrinsicId = IsV5OrAbove ? Intrinsic::amdgcn_implicitarg_ptr
80 : Intrinsic::amdgcn_dispatch_ptr;
82}
83
84}
85
88 if (MaxNumGroups == 0 || MaxNumGroups == std::numeric_limits<uint32_t>::max())
89 return;
90
91 if (!Load->getType()->isIntegerTy(32))
92 return;
93
94
95 MDBuilder MDB(Load->getContext());
97 Load->setMetadata(LLVMContext::MD_range, Range);
98}
99
102
103 auto *MD = F->getMetadata("reqd_work_group_size");
104 const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3;
105
106 const bool HasUniformWorkGroupSize =
107 F->getFnAttribute("uniform-work-group-size").getValueAsBool();
108
111 3, 0);
112
113 if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize &&
114 none_of(MaxNumWorkgroups, [](unsigned X) { return X != 0; }))
115 return false;
116
117 Value *BlockCounts[3] = {nullptr, nullptr, nullptr};
118 Value *GroupSizes[3] = {nullptr, nullptr, nullptr};
119 Value *Remainders[3] = {nullptr, nullptr, nullptr};
120 Value *GridSizes[3] = {nullptr, nullptr, nullptr};
121
123
124
125
127 if (!U->hasOneUse())
128 continue;
129
131 auto *Load = dyn_cast(U);
133 if (!Load && !BCI) {
135 continue;
138 }
139
140 if (BCI) {
141 if (!BCI->hasOneUse())
142 continue;
144 }
145
146 if (!Load || !Load->isSimple())
147 continue;
148
149 unsigned LoadSize = DL.getTypeStoreSize(Load->getType());
150
151
152 if (IsV5OrAbove) {
154 case HIDDEN_BLOCK_COUNT_X:
155 if (LoadSize == 4) {
156 BlockCounts[0] = Load;
158 }
159 break;
160 case HIDDEN_BLOCK_COUNT_Y:
161 if (LoadSize == 4) {
162 BlockCounts[1] = Load;
164 }
165 break;
166 case HIDDEN_BLOCK_COUNT_Z:
167 if (LoadSize == 4) {
168 BlockCounts[2] = Load;
170 }
171 break;
172 case HIDDEN_GROUP_SIZE_X:
173 if (LoadSize == 2)
174 GroupSizes[0] = Load;
175 break;
176 case HIDDEN_GROUP_SIZE_Y:
177 if (LoadSize == 2)
178 GroupSizes[1] = Load;
179 break;
180 case HIDDEN_GROUP_SIZE_Z:
181 if (LoadSize == 2)
182 GroupSizes[2] = Load;
183 break;
184 case HIDDEN_REMAINDER_X:
185 if (LoadSize == 2)
186 Remainders[0] = Load;
187 break;
188 case HIDDEN_REMAINDER_Y:
189 if (LoadSize == 2)
190 Remainders[1] = Load;
191 break;
192 case HIDDEN_REMAINDER_Z:
193 if (LoadSize == 2)
194 Remainders[2] = Load;
195 break;
196 default:
197 break;
198 }
199 } else {
201 case WORKGROUP_SIZE_X:
202 if (LoadSize == 2)
203 GroupSizes[0] = Load;
204 break;
205 case WORKGROUP_SIZE_Y:
206 if (LoadSize == 2)
207 GroupSizes[1] = Load;
208 break;
209 case WORKGROUP_SIZE_Z:
210 if (LoadSize == 2)
211 GroupSizes[2] = Load;
212 break;
213 case GRID_SIZE_X:
214 if (LoadSize == 4)
215 GridSizes[0] = Load;
216 break;
217 case GRID_SIZE_Y:
218 if (LoadSize == 4)
219 GridSizes[1] = Load;
220 break;
221 case GRID_SIZE_Z:
222 if (LoadSize == 4)
223 GridSizes[2] = Load;
224 break;
225 default:
226 break;
227 }
228 }
229 }
230
231 bool MadeChange = false;
232 if (IsV5OrAbove && HasUniformWorkGroupSize) {
233
234
235
236
237
238
239
240 for (int I = 0; I < 3; ++I) {
241 Value *BlockCount = BlockCounts[I];
242 if (!BlockCount)
243 continue;
244
246 auto GroupIDIntrin =
250
251 for (User *ICmp : BlockCount->users()) {
255 MadeChange = true;
256 }
257 }
258 }
259
260
261 for (Value *Remainder : Remainders) {
262 if (!Remainder)
263 continue;
265 MadeChange = true;
266 }
267 } else if (HasUniformWorkGroupSize) {
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287 for (int I = 0; I < 3; ++I) {
288 Value *GroupSize = GroupSizes[I];
289 Value *GridSize = GridSizes[I];
290 if (!GroupSize || !GridSize)
291 continue;
292
294 auto GroupIDIntrin =
298
299 for (User *U : GroupSize->users()) {
301 if (!ZextGroupSize)
302 continue;
303
304 for (User *UMin : ZextGroupSize->users()) {
309 if (HasReqdWorkGroupSize) {
313 KnownSize, UMin->getType(), false, DL));
314 } else {
315 UMin->replaceAllUsesWith(ZextGroupSize);
316 }
317
318 MadeChange = true;
319 }
320 }
321 }
322 }
323 }
324
325
326 if (!HasReqdWorkGroupSize)
327 return MadeChange;
328
329 for (int I = 0; I < 3; I++) {
330 Value *GroupSize = GroupSizes[I];
331 if (!GroupSize)
332 continue;
333
337 MadeChange = true;
338 }
339
340 return MadeChange;
341}
342
343
344
345
346bool AMDGPULowerKernelAttributes::runOnModule(Module &M) {
347 bool MadeChange = false;
348 bool IsV5OrAbove =
351
352 if (!BasePtr)
353 return false;
354
355 SmallPtrSet<Instruction *, 4> HandledUses;
356 for (auto *U : BasePtr->users()) {
358 if (HandledUses.insert(CI).second) {
360 MadeChange = true;
361 }
362 }
363
364 return MadeChange;
365}
366
367
369 "AMDGPU Kernel Attributes", false, false)
372
373char AMDGPULowerKernelAttributes::ID = 0;
374
376 return new AMDGPULowerKernelAttributes();
377}
378
381 bool IsV5OrAbove =
383 Function *BasePtr = getBasePtrIntrinsic(*F.getParent(), IsV5OrAbove);
384
385 if (!BasePtr)
387
392 }
393 }
394
396}
static void annotateGridSizeLoadWithRangeMD(LoadInst *Load, uint32_t MaxNumGroups)
Definition AMDGPULowerKernelAttributes.cpp:86
static bool processUse(CallInst *CI, bool IsV5OrAbove)
Definition AMDGPULowerKernelAttributes.cpp:100
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Class for arbitrary precision integers.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
unsigned getAMDHSACodeObjectVersion(const Module &M)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
ModulePass * createAMDGPULowerKernelAttributesPass()
Definition AMDGPULowerKernelAttributes.cpp:375
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition AMDGPULowerKernelAttributes.cpp:380