LLVM: lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

27

28using namespace llvm;

30

31#define DEBUG_TYPE "amdgpu-resource-usage"

32

36

37

38

39

41 "amdgpu-assume-external-call-stack-size",

42 cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,

44

46 "amdgpu-assume-dynamic-stack-object-size",

47 cl::desc("Assumed extra stack use if there are any "

48 "variable sized objects (in bytes)"),

50

52 "Function register usage analysis", true, true)

53

55 if (Op.isImm()) {

56 assert(Op.getImm() == 0);

57 return nullptr;

58 }

59 return cast(Op.getGlobal()->stripPointerCastsAndAliases());

60}

61

65 if (!UseOp.isImplicit() || TII.isFLAT(*UseOp.getParent()))

66 return true;

67 }

68

69 return false;

70}

71

75 if (!TPC)

76 return false;

77

80

81

82

83 uint32_t AssumedStackSizeForDynamicSizeObjects =

90 AssumedStackSizeForDynamicSizeObjects = 0;

92 AssumedStackSizeForExternalCall = 0;

93 }

94

96 MF, AssumedStackSizeForDynamicSizeObjects,

97 AssumedStackSizeForExternalCall);

98

99 return false;

100}

101

102AnalysisKey AMDGPUResourceUsageAnalysis::Key;

107

108

109

110 uint32_t AssumedStackSizeForDynamicSizeObjects =

117 AssumedStackSizeForDynamicSizeObjects = 0;

119 AssumedStackSizeForExternalCall = 0;

120 }

121

123 MF, AssumedStackSizeForDynamicSizeObjects,

124 AssumedStackSizeForExternalCall);

125}

126

130 uint32_t AssumedStackSizeForExternalCall) const {

132

139

140 Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||

141 MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||

144

146

147

148

149

150

151

152

157 Info.UsesFlatScratch = false;

158 }

159

160 Info.PrivateSegmentSize = FrameInfo.getStackSize();

161

162

163 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();

164 if (Info.HasDynamicallySizedStack)

165 Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;

166

168 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();

169

170 Info.UsesVCC =

171 MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);

172 Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass,

173 false);

174 if (ST.hasMAIInsts())

175 Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass,

176 false);

177

178

179

180

181 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {

182 Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass,

183 false);

184 return Info;

185 }

186

187 int32_t MaxVGPR = -1;

188 Info.CalleeSegmentSize = 0;

189

192 for (unsigned I = 0; I < MI.getNumOperands(); ++I) {

194

196 continue;

197

199 switch (Reg) {

200 case AMDGPU::NoRegister:

202 "Instruction uses invalid noreg register");

203 continue;

204

205 case AMDGPU::XNACK_MASK:

206 case AMDGPU::XNACK_MASK_LO:

207 case AMDGPU::XNACK_MASK_HI:

209

210 case AMDGPU::LDS_DIRECT:

212

213 case AMDGPU::TBA:

214 case AMDGPU::TBA_LO:

215 case AMDGPU::TBA_HI:

216 case AMDGPU::TMA:

217 case AMDGPU::TMA_LO:

218 case AMDGPU::TMA_HI:

219 llvm_unreachable("trap handler registers should not be used");

220

221 case AMDGPU::SRC_VCCZ:

223

224 case AMDGPU::SRC_EXECZ:

226

227 case AMDGPU::SRC_SCC:

229

230 default:

231 break;

232 }

233

235 assert((!RC || TRI.isVGPRClass(RC) || TRI.isSGPRClass(RC) ||

236 TRI.isAGPRClass(RC) || AMDGPU::TTMP_32RegClass.contains(Reg) ||

237 AMDGPU::TTMP_64RegClass.contains(Reg) ||

238 AMDGPU::TTMP_128RegClass.contains(Reg) ||

239 AMDGPU::TTMP_256RegClass.contains(Reg) ||

240 AMDGPU::TTMP_512RegClass.contains(Reg)) &&

241 "Unknown register class");

242

243 if (!RC || TRI.isVGPRClass(RC))

244 continue;

245

246 if (MI.isCall() || MI.isMetaInstruction())

247 continue;

248

249 unsigned Width = divideCeil(TRI.getRegSizeInBits(*RC), 32);

250 unsigned HWReg = TRI.getHWRegIndex(Reg);

251 int MaxUsed = HWReg + Width - 1;

252 MaxVGPR = std::max(MaxUsed, MaxVGPR);

253 }

254

255 if (MI.isCall()) {

256

257

258

260 TII->getNamedOperand(MI, AMDGPU::OpName::callee);

261

262 const Function *Callee = getCalleeFunction(*CalleeOp);

263

266 };

267

268 if (Callee && !isSameFunction(MF, Callee))

269 Info.Callees.push_back(Callee);

270

271 bool IsIndirect = !Callee || Callee->isDeclaration();

272

273

274 if (!Callee || !Callee->doesNotRecurse()) {

275 Info.HasRecursion = true;

276

277

278

279 if (MI.isReturn()) {

280

281

282

283

284

285

286 Info.CalleeSegmentSize = std::max(

287 Info.CalleeSegmentSize,

288 static_cast<uint64_t>(AssumedStackSizeForExternalCall));

289 }

290 }

291

292 if (IsIndirect) {

293 Info.CalleeSegmentSize =

294 std::max(Info.CalleeSegmentSize,

295 static_cast<uint64_t>(AssumedStackSizeForExternalCall));

296

297

298 Info.UsesVCC = true;

299 Info.UsesFlatScratch = ST.hasFlatAddressSpace();

300 Info.HasDynamicallySizedStack = true;

301 Info.HasIndirectCall = true;

302 }

303 }

304 }

305 }

306

307 Info.NumVGPR = MaxVGPR + 1;

308

309 return Info;

310}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static cl::opt< uint32_t > clAssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))

static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)

Definition AMDGPUResourceUsageAnalysis.cpp:62

static cl::opt< uint32_t > clAssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))

Analyzes how many registers and other resources are used by functions.

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

Target-Independent Code Generator Pass Configuration Options pass.

uint32_t getNumNamedBarriers() const

Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition AMDGPUResourceUsageAnalysis.cpp:104

AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo Result

bool hasFlatScratchInit() const

Module * getParent()

Get the module that this global value is contained inside of...

Generic base class for all target subtargets.

const Triple & getTargetTriple() const

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Representation of each machine instruction.

MachineOperand class - Representation of each machine instruction operand.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

AnalysisType * getAnalysisIfAvailable() const

getAnalysisIfAvailable() - Subclasses use this function to get analysis information tha...

Wrapper class representing virtual and physical registers.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

GCNUserSGPRUsageInfo & getUserSGPRInfo()

bool isStackRealigned() const

MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const

Primary interface to the complete machine description for the target machine.

const MCSubtargetInfo * getMCSubtargetInfo() const

OSType getOS() const

Get the parsed operating system type of this triple.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned getAMDHSACodeObjectVersion(const Module &M)

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

char & AMDGPUResourceUsageAnalysisID

Definition AMDGPUResourceUsageAnalysis.cpp:34

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

DWARFExpression::Operation Op

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF, uint32_t AssumedStackSizeForDynamicSizeObjects, uint32_t AssumedStackSizeForExternalCall) const

Definition AMDGPUResourceUsageAnalysis.cpp:128

bool runOnMachineFunction(MachineFunction &MF) override

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

Definition AMDGPUResourceUsageAnalysis.cpp:72

FunctionResourceInfo ResourceInfo

A special type used by analysis passes to provide an address that identifies that particular analysis...