LLVM: lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

21

23

27 case AMDGPU::S_WAITCNT:

28 case AMDGPU::S_WAITCNT_soft:

29 case AMDGPU::S_WAITCNT_EXPCNT:

30 case AMDGPU::S_WAITCNT_LGKMCNT:

31 case AMDGPU::S_WAITCNT_VMCNT:

32 case AMDGPU::S_WAITCNT_VSCNT:

33 case AMDGPU::S_WAITCNT_VSCNT_soft:

34 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:

35 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:

36 case AMDGPU::S_WAITCNT_VMCNT_gfx10:

37 case AMDGPU::S_WAITCNT_VSCNT_gfx10:

38 case AMDGPU::S_WAITCNT_gfx10:

39 case AMDGPU::S_WAITCNT_gfx6_gfx7:

40 case AMDGPU::S_WAITCNT_vi:

41 return processWaitCnt(Inst, MCI);

42 }

43}

44

45

46

47void AMDGPUInstrPostProcess::processWaitCnt(Instruction &Inst,

49 for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {

52 if (MCOp.isReg()) {

54 } else if (MCOp.isImm()) {

56 }

57 Op.setIndex(Idx);

59 }

60}

61

66 generateWaitCntInfo();

67}

68

72 unsigned Opcode = Inst.getOpcode();

73

74

75

76

77

78 switch (Opcode) {

79 default:

80 return 0;

81 case AMDGPU::S_WAITCNT:

82 case AMDGPU::S_WAITCNT_soft:

83 case AMDGPU::S_WAITCNT_EXPCNT:

84 case AMDGPU::S_WAITCNT_LGKMCNT:

85 case AMDGPU::S_WAITCNT_VMCNT:

86 case AMDGPU::S_WAITCNT_VSCNT:

87 case AMDGPU::S_WAITCNT_VSCNT_soft:

88 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:

89 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:

90 case AMDGPU::S_WAITCNT_VMCNT_gfx10:

91 case AMDGPU::S_WAITCNT_VSCNT_gfx10:

92 case AMDGPU::S_WAITCNT_gfx10:

93 case AMDGPU::S_WAITCNT_gfx6_gfx7:

94 case AMDGPU::S_WAITCNT_vi:

95

96

97

98

99 return handleWaitCnt(IssuedInst, IR);

100 }

101

102 return 0;

103}

104

105unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef IssuedInst,

107

108

109

110 unsigned Vmcnt = 63;

111 unsigned Expcnt = 7;

112 unsigned Lgkmcnt = 31;

113 unsigned Vscnt = 63;

114 unsigned CurrVmcnt = 0;

115 unsigned CurrExpcnt = 0;

116 unsigned CurrLgkmcnt = 0;

117 unsigned CurrVscnt = 0;

118 unsigned CyclesToWaitVm = ~0U;

119 unsigned CyclesToWaitExp = ~0U;

120 unsigned CyclesToWaitLgkm = ~0U;

121 unsigned CyclesToWaitVs = ~0U;

122

123 computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);

124

125

126

127 for (const InstRef &PrevIR : IssuedInst) {

128 const Instruction &PrevInst = *PrevIR.getInstruction();

129 const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();

130 const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];

133 "We should know how many cycles are left for this instruction");

134 if (PrevInstWaitInfo.VmCnt) {

135 CurrVmcnt++;

136 if ((unsigned)CyclesLeft < CyclesToWaitVm)

137 CyclesToWaitVm = CyclesLeft;

138 }

139 if (PrevInstWaitInfo.ExpCnt) {

140 CurrExpcnt++;

141 if ((unsigned)CyclesLeft < CyclesToWaitExp)

142 CyclesToWaitExp = CyclesLeft;

143 }

144 if (PrevInstWaitInfo.LgkmCnt) {

145 CurrLgkmcnt++;

146 if ((unsigned)CyclesLeft < CyclesToWaitLgkm)

147 CyclesToWaitLgkm = CyclesLeft;

148 }

149 if (PrevInstWaitInfo.VsCnt) {

150 CurrVscnt++;

151 if ((unsigned)CyclesLeft < CyclesToWaitVs)

152 CyclesToWaitVs = CyclesLeft;

153 }

154 }

155

156 unsigned CyclesToWait = ~0U;

157 if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)

158 CyclesToWait = CyclesToWaitVm;

159 if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)

160 CyclesToWait = CyclesToWaitExp;

161 if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)

162 CyclesToWait = CyclesToWaitLgkm;

163 if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)

164 CyclesToWait = CyclesToWaitVs;

165

166

167

168

169

170

171

172 if (CyclesToWait == ~0U)

173 return 0;

174 return CyclesToWait;

175}

176

177void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt,

178 unsigned &Expcnt, unsigned &Lgkmcnt,

179 unsigned &Vscnt) {

182 unsigned Opcode = Inst.getOpcode();

183

184 switch (Opcode) {

185 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:

186 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:

187 case AMDGPU::S_WAITCNT_VMCNT_gfx10:

188 case AMDGPU::S_WAITCNT_VSCNT_gfx10: {

189

190

191

192 const MCAOperand *OpReg = Inst.getOperand(0);

193 const MCAOperand *OpImm = Inst.getOperand(1);

194 assert(OpReg && OpReg->isReg() && "First operand should be a register.");

195 assert(OpImm && OpImm->isImm() && "Second operand should be an immediate.");

196 if (OpReg->getReg() != AMDGPU::SGPR_NULL) {

197

198

199

201 << MCII.getName(Opcode) << " will be completely "

202 << "ignored. So the wait may not be accurate.\n";

203 }

204 switch (Opcode) {

205

206

207

208 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:

209 Expcnt = OpImm->getImm();

210 break;

211 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:

212 Lgkmcnt = OpImm->getImm();

213 break;

214 case AMDGPU::S_WAITCNT_VMCNT_gfx10:

215 Vmcnt = OpImm->getImm();

216 break;

217 case AMDGPU::S_WAITCNT_VSCNT_gfx10:

218 Vscnt = OpImm->getImm();

219 break;

220 }

221 return;

222 }

223 case AMDGPU::S_WAITCNT_gfx10:

224 case AMDGPU::S_WAITCNT_gfx6_gfx7:

225 case AMDGPU::S_WAITCNT_vi:

226 unsigned WaitCnt = Inst.getOperand(0)->getImm();

228 return;

229 }

230}

231

232void AMDGPUCustomBehaviour::generateWaitCntInfo() {

233

234

235

236

237

238

239

240

241

242

244 InstrWaitCntInfo.resize(SrcMgr.size());

245

247 const std::unique_ptr &Inst = EN.value();

248 unsigned Index = EN.index();

249 unsigned Opcode = Inst->getOpcode();

250 const MCInstrDesc &MCID = MCII.get(Opcode);

253 InstrWaitCntInfo[Index].LgkmCnt = true;

254 if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))

255 InstrWaitCntInfo[Index].ExpCnt = true;

257

258

259

260

261 InstrWaitCntInfo[Index].LgkmCnt = true;

262 if (STI.hasFeature(AMDGPU::FeatureVscnt))

263 InstrWaitCntInfo[Index].VmCnt = true;

265 InstrWaitCntInfo[Index].VmCnt = true;

266 else

267 InstrWaitCntInfo[Index].VsCnt = true;

269 if (STI.hasFeature(AMDGPU::FeatureVscnt))

270 InstrWaitCntInfo[Index].VmCnt = true;

271 else if ((MCID.mayLoad() &&

275 InstrWaitCntInfo[Index].VmCnt = true;

277 InstrWaitCntInfo[Index].VsCnt = true;

278

279

280

281

282

283 if (IV.Major < 7 &&

285 InstrWaitCntInfo[Index].ExpCnt = true;

287 InstrWaitCntInfo[Index].LgkmCnt = true;

289 InstrWaitCntInfo[Index].ExpCnt = true;

290 } else {

291 switch (Opcode) {

292 case AMDGPU::S_SENDMSG:

293 case AMDGPU::S_SENDMSGHALT:

294 case AMDGPU::S_MEMTIME:

295 case AMDGPU::S_MEMREALTIME:

296 InstrWaitCntInfo[Index].LgkmCnt = true;

297 break;

298 }

299 }

300 }

301}

302

303

304bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {

308}

309

310

311bool AMDGPUCustomBehaviour::hasModifiersSet(

312 const std::unique_ptr &Inst, AMDGPU::OpName OpName) const {

313 int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);

314 if (Idx == -1)

315 return false;

316

317 const MCAOperand *Op = Inst->getOperand(Idx);

318 if (Op == nullptr || Op->isImm() || Op->getImm())

319 return false;

320

321 return true;

322}

323

324

325bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode) const {

326 const MCInstrDesc &MCID = MCII.get(Opcode);

328}

329

330

331bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const {

332 return Opcode == AMDGPU::DS_ORDERED_COUNT ||

333 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||

334 Opcode == AMDGPU::DS_SUB_GS_REG_RTN || isGWS(Opcode);

335}

336

337}

338

339using namespace llvm;

340using namespace mca;

341

342static CustomBehaviour *

348

349static InstrPostProcess *

354

355

356

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static CustomBehaviour * createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)

Definition AMDGPUCustomBehaviour.cpp:343

LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA()

Extern function to initialize the targets for the AMDGPU backend.

Definition AMDGPUCustomBehaviour.cpp:358

static InstrPostProcess * createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)

Definition AMDGPUCustomBehaviour.cpp:350

This file defines the AMDGPUCustomBehaviour class which inherits from CustomBehaviour.

Provides AMDGPU specific target descriptions.

#define LLVM_EXTERNAL_VISIBILITY

Legalize the Machine IR a function s Machine IR

static const uint32_t IV[8]

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Instances of this class represent a single low-level machine instruction.

unsigned getOpcode() const

const MCOperand & getOperand(unsigned i) const

bool mayStore() const

Return true if this instruction could possibly modify memory.

bool mayLoad() const

Return true if this instruction could possibly read memory.

Interface to description of machine instruction set.

Instances of this class represent operands of the MCInst class.

MCRegister getReg() const

Returns the register number.

Generic base class for all target subtargets.

Value * getOperand(unsigned i) const

static LLVM_ABI raw_ostream & warning()

Convenience method for printing "warning: " to stderr.

unsigned checkCustomHazard(ArrayRef< InstRef > IssuedInst, const InstRef &IR) override

This method is used to determine if an instruction should be allowed to be dispatched.

Definition AMDGPUCustomBehaviour.cpp:69

AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)

Definition AMDGPUCustomBehaviour.cpp:62

void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override

This method can be overriden by targets to modify the mca::Instruction object after it has been lower...

Definition AMDGPUCustomBehaviour.cpp:24

const mca::SourceMgr & SrcMgr

const MCSubtargetInfo & STI

CustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)

An InstRef contains both a SourceMgr index and Instruction pair.

unsigned getOpcode() const

void addOperand(const MCAOperand Op)

An instruction propagated through the simulated instruction pipeline.

int getCyclesLeft() const

A representation of an mca::Instruction operand for use in mca::CustomBehaviour.

unsigned getReg() const

Returns the register number.

static MCAOperand createImm(int64_t Val)

static MCAOperand createReg(unsigned Reg)

void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)

Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...

LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)

bool getMUBUFIsBufferInv(unsigned Opc)

constexpr int UNKNOWN_CYCLES

friend class Instruction

Iterator for Instructions in a `BasicBlock.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

Target & getTheR600Target()

The target for R600 GPUs.

Target & getTheGCNTarget()

The target for GCN GPUs.

DWARFExpression::Operation Op

static void RegisterInstrPostProcess(Target &T, Target::InstrPostProcessCtorTy Fn)

RegisterInstrPostProcess - Register an InstrPostProcess implementation for the given target.

static void RegisterCustomBehaviour(Target &T, Target::CustomBehaviourCtorTy Fn)

RegisterCustomBehaviour - Register a CustomBehaviour implementation for the given target.

Abstracting the input code sequence (a sequence of MCInst) and assigning unique identifiers to every ...