LLVM: lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
27
28using namespace llvm;
30
31#define DEBUG_TYPE "amdgpu-resource-usage"
32
36
37
38
39
41 "amdgpu-assume-external-call-stack-size",
42 cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
44
46 "amdgpu-assume-dynamic-stack-object-size",
47 cl::desc("Assumed extra stack use if there are any "
48 "variable sized objects (in bytes)"),
50
52 "Function register usage analysis", true, true)
53
55 if (Op.isImm()) {
56 assert(Op.getImm() == 0);
57 return nullptr;
58 }
59 return cast(Op.getGlobal()->stripPointerCastsAndAliases());
60}
61
65 if (!UseOp.isImplicit() || .isFLAT(*UseOp.getParent()))
66 return true;
67 }
68
69 return false;
70}
71
75 if (!TPC)
76 return false;
77
80
81
82
83 uint32_t AssumedStackSizeForDynamicSizeObjects =
90 AssumedStackSizeForDynamicSizeObjects = 0;
92 AssumedStackSizeForExternalCall = 0;
93 }
94
96 MF, AssumedStackSizeForDynamicSizeObjects,
97 AssumedStackSizeForExternalCall);
98
99 return false;
100}
101
102AnalysisKey AMDGPUResourceUsageAnalysis::Key;
107
108
109
110 uint32_t AssumedStackSizeForDynamicSizeObjects =
117 AssumedStackSizeForDynamicSizeObjects = 0;
119 AssumedStackSizeForExternalCall = 0;
120 }
121
123 MF, AssumedStackSizeForDynamicSizeObjects,
124 AssumedStackSizeForExternalCall);
125}
126
130 uint32_t AssumedStackSizeForExternalCall) const {
132
139
140 Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
141 MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
144
146
147
148
149
150
151
152
157 Info.UsesFlatScratch = false;
158 }
159
160 Info.PrivateSegmentSize = FrameInfo.getStackSize();
161
162
163 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
164 if (Info.HasDynamicallySizedStack)
165 Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
166
168 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
169
170 Info.UsesVCC =
171 MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
172 Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass,
173 false);
174 if (ST.hasMAIInsts())
175 Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass,
176 false);
177
178
179
180
181 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
182 Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass,
183 false);
184 return Info;
185 }
186
187 int32_t MaxVGPR = -1;
188 Info.CalleeSegmentSize = 0;
189
192 for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
194
196 continue;
197
199 switch (Reg) {
200 case AMDGPU::NoRegister:
202 "Instruction uses invalid noreg register");
203 continue;
204
205 case AMDGPU::XNACK_MASK:
206 case AMDGPU::XNACK_MASK_LO:
207 case AMDGPU::XNACK_MASK_HI:
209
210 case AMDGPU::LDS_DIRECT:
212
213 case AMDGPU::TBA:
214 case AMDGPU::TBA_LO:
215 case AMDGPU::TBA_HI:
216 case AMDGPU::TMA:
217 case AMDGPU::TMA_LO:
218 case AMDGPU::TMA_HI:
219 llvm_unreachable("trap handler registers should not be used");
220
221 case AMDGPU::SRC_VCCZ:
223
224 case AMDGPU::SRC_EXECZ:
226
227 case AMDGPU::SRC_SCC:
229
230 default:
231 break;
232 }
233
235 assert((!RC || TRI.isVGPRClass(RC) || TRI.isSGPRClass(RC) ||
236 TRI.isAGPRClass(RC) || AMDGPU::TTMP_32RegClass.contains(Reg) ||
237 AMDGPU::TTMP_64RegClass.contains(Reg) ||
238 AMDGPU::TTMP_128RegClass.contains(Reg) ||
239 AMDGPU::TTMP_256RegClass.contains(Reg) ||
240 AMDGPU::TTMP_512RegClass.contains(Reg)) &&
241 "Unknown register class");
242
243 if (!RC || .isVGPRClass(RC))
244 continue;
245
246 if (MI.isCall() || MI.isMetaInstruction())
247 continue;
248
249 unsigned Width = divideCeil(TRI.getRegSizeInBits(*RC), 32);
250 unsigned HWReg = TRI.getHWRegIndex(Reg);
251 int MaxUsed = HWReg + Width - 1;
252 MaxVGPR = std::max(MaxUsed, MaxVGPR);
253 }
254
255 if (MI.isCall()) {
256
257
258
260 TII->getNamedOperand(MI, AMDGPU::OpName::callee);
261
262 const Function *Callee = getCalleeFunction(*CalleeOp);
263
266 };
267
268 if (Callee && !isSameFunction(MF, Callee))
269 Info.Callees.push_back(Callee);
270
271 bool IsIndirect = !Callee || Callee->isDeclaration();
272
273
274 if (!Callee || !Callee->doesNotRecurse()) {
275 Info.HasRecursion = true;
276
277
278
279 if (.isReturn()) {
280
281
282
283
284
285
286 Info.CalleeSegmentSize = std::max(
287 Info.CalleeSegmentSize,
288 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
289 }
290 }
291
292 if (IsIndirect) {
293 Info.CalleeSegmentSize =
294 std::max(Info.CalleeSegmentSize,
295 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
296
297
298 Info.UsesVCC = true;
299 Info.UsesFlatScratch = ST.hasFlatAddressSpace();
300 Info.HasDynamicallySizedStack = true;
301 Info.HasIndirectCall = true;
302 }
303 }
304 }
305 }
306
307 Info.NumVGPR = MaxVGPR + 1;
308
309 return Info;
310}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static cl::opt< uint32_t > clAssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
Definition AMDGPUResourceUsageAnalysis.cpp:62
static cl::opt< uint32_t > clAssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
Analyzes how many registers and other resources are used by functions.
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
uint32_t getNumNamedBarriers() const
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition AMDGPUResourceUsageAnalysis.cpp:104
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo Result
bool hasFlatScratchInit() const
Module * getParent()
Get the module that this global value is contained inside of...
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable() - Subclasses use this function to get analysis information tha...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
bool isStackRealigned() const
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Primary interface to the complete machine description for the target machine.
const MCSubtargetInfo * getMCSubtargetInfo() const
OSType getOS() const
Get the parsed operating system type of this triple.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAMDHSACodeObjectVersion(const Module &M)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUResourceUsageAnalysisID
Definition AMDGPUResourceUsageAnalysis.cpp:34
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF, uint32_t AssumedStackSizeForDynamicSizeObjects, uint32_t AssumedStackSizeForExternalCall) const
Definition AMDGPUResourceUsageAnalysis.cpp:128
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition AMDGPUResourceUsageAnalysis.cpp:72
FunctionResourceInfo ResourceInfo
A special type used by analysis passes to provide an address that identifies that particular analysis...