LLVM: lib/Target/AMDGPU/R600MachineScheduler.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

17

18using namespace llvm;

19

20#define DEBUG_TYPE "machine-scheduler"

21

26 TII = static_cast<const R600InstrInfo*>(DAG->TII);

28 VLIW5 = !ST.hasCaymanISA();

29 MRI = &DAG->MRI;

30 CurInstKind = IDOther;

31 CurEmitted = 0;

32 OccupiedSlotsMask = 31;

33 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();

34 InstKindLimit[IDOther] = 32;

35 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();

36 AluInstCount = 0;

37 FetchInstCount = 0;

38}

39

40void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,

41 std::vector<SUnit *> &QDst)

42{

44 QSrc.clear();

45}

46

48 assert (GPRCount && "GPRCount cannot be 0");

49 return 248 / GPRCount;

50}

51

53 SUnit *SU = nullptr;

54 NextInstKind = IDOther;

55

56 IsTopNode = false;

57

58

59 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||

60 (Available[CurInstKind].empty());

61 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&

62 (!Available[IDFetch].empty() || !Available[IDOther].empty());

63

64 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {

65

66

67

68

69 float ALUFetchRationEstimate =

70 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /

71 (FetchInstCount + Available[IDFetch].size());

72 if (ALUFetchRationEstimate == 0) {

73 AllowSwitchFromAlu = true;

74 } else {

75 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;

76 LLVM_DEBUG(dbgs() << NeededWF << " approx. Wavefronts Required\n");

77

78

79

80

81

82

83

84

85

86

87 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();

89 AllowSwitchFromAlu = true;

90 }

91 }

92

93 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||

94 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {

95

96 SU = pickAlu();

97 if (!SU && !PhysicalRegCopy.empty()) {

98 SU = PhysicalRegCopy.front();

99 PhysicalRegCopy.erase(PhysicalRegCopy.begin());

100 }

101 if (SU) {

102 if (CurEmitted >= InstKindLimit[IDAlu])

103 CurEmitted = 0;

104 NextInstKind = IDAlu;

105 }

106 }

107

108 if (!SU) {

109

110 SU = pickOther(IDFetch);

111 if (SU)

112 NextInstKind = IDFetch;

113 }

114

115

116 if (!SU) {

117 SU = pickOther(IDOther);

118 if (SU)

119 NextInstKind = IDOther;

120 }

121

123 dbgs() << " ** Pick node **\n";

124 DAG->dumpNode(*SU);

125 } else {

126 dbgs() << "NO NODE \n";

127 for (const SUnit &S : DAG->SUnits)

129 DAG->dumpNode(S);

130 });

131

132 return SU;

133}

134

136 if (NextInstKind != CurInstKind) {

138 if (NextInstKind != IDAlu)

139 OccupiedSlotsMask |= 31;

140 CurEmitted = 0;

141 CurInstKind = NextInstKind;

142 }

143

144 if (CurInstKind == IDAlu) {

145 AluInstCount ++;

146 switch (getAluKind(SU)) {

147 case AluT_XYZW:

148 CurEmitted += 4;

149 break;

150 case AluDiscarded:

151 break;

152 default: {

153 ++CurEmitted;

157 if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)

158 ++CurEmitted;

159 }

160 }

161 }

162 } else {

163 ++CurEmitted;

164 }

165

166 LLVM_DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");

167

168 if (CurInstKind != IDFetch) {

169 MoveUnits(Pending[IDFetch], Available[IDFetch]);

170 } else

171 FetchInstCount++;

172}

173

174static bool

176 if (MI->getOpcode() != R600::COPY)

177 return false;

178

179 return MI->getOperand(1).getReg().isVirtual();

180}

181

185

187 LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU));

189 PhysicalRegCopy.push_back(SU);

190 return;

191 }

192

193 int IK = getInstKind(SU);

194

195

196 if (IK == IDOther)

197 Available[IDOther].push_back(SU);

198 else

199 Pending[IK].push_back(SU);

200

201}

202

203bool R600SchedStrategy::regBelongsToClass(Register Reg,

205 if (!Reg.isVirtual())

207 return MRI->getRegClass(Reg) == RC;

208}

209

210R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {

212

213 if (TII->isTransOnly(*MI))

214 return AluTrans;

215

216 switch (MI->getOpcode()) {

217 case R600::PRED_X:

218 return AluPredX;

219 case R600::INTERP_PAIR_XY:

220 case R600::INTERP_PAIR_ZW:

221 case R600::INTERP_VEC_LOAD:

222 case R600::DOT_4:

223 return AluT_XYZW;

224 case R600::COPY:

225 if (MI->getOperand(1).isUndef()) {

226

227 return AluDiscarded;

228 }

229 break;

230 default:

231 break;

232 }

233

234

235

236

237 if(TII->isVector(*MI) ||

238 TII->isCubeOp(MI->getOpcode()) ||

239 TII->isReductionOp(MI->getOpcode()) ||

240 MI->getOpcode() == R600::GROUP_BARRIER) {

241 return AluT_XYZW;

242 }

243

244 if (TII->isLDSInstr(MI->getOpcode())) {

245 return AluT_X;

246 }

247

248

249 unsigned DestSubReg = MI->getOperand(0).getSubReg();

250 switch (DestSubReg) {

251 case R600::sub0:

252 return AluT_X;

253 case R600::sub1:

254 return AluT_Y;

255 case R600::sub2:

256 return AluT_Z;

257 case R600::sub3:

258 return AluT_W;

259 default:

260 break;

261 }

262

263

264 Register DestReg = MI->getOperand(0).getReg();

265 if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||

266 regBelongsToClass(DestReg, &R600::R600_AddrRegClass))

267 return AluT_X;

268 if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))

269 return AluT_Y;

270 if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))

271 return AluT_Z;

272 if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))

273 return AluT_W;

274 if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))

275 return AluT_XYZW;

276

277

278 if (TII->readsLDSSrcReg(*MI))

279 return AluT_XYZW;

280

281 return AluAny;

282}

283

284int R600SchedStrategy::getInstKind(SUnit* SU) {

286

287 if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))

288 return IDFetch;

289

290 if (TII->isALUInstr(Opcode)) {

291 return IDAlu;

292 }

293

294 switch (Opcode) {

295 case R600::PRED_X:

296 case R600::COPY:

297 case R600::CONST_COPY:

298 case R600::INTERP_PAIR_XY:

299 case R600::INTERP_PAIR_ZW:

300 case R600::INTERP_VEC_LOAD:

301 case R600::DOT_4:

302 return IDAlu;

303 default:

304 return IDOther;

305 }

306}

307

308SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {

309 if (Q.empty())

310 return nullptr;

311 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();

312 It != E; ++It) {

313 SUnit *SU = *It;

314 InstructionsGroupCandidate.push_back(SU->getInstr());

315 if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) &&

316 (!AnyALU || !TII->isVectorOnly(*SU->getInstr()))) {

317 InstructionsGroupCandidate.pop_back();

318 Q.erase((It + 1).base());

319 return SU;

320 }

321 InstructionsGroupCandidate.pop_back();

322 }

323 return nullptr;

324}

325

326void R600SchedStrategy::LoadAlu() {

327 std::vector<SUnit *> &QSrc = Pending[IDAlu];

328 for (SUnit *SU : QSrc) {

329 AluKind AK = getAluKind(SU);

330 AvailableAlus[AK].push_back(SU);

331 }

332 QSrc.clear();

333}

334

335void R600SchedStrategy::PrepareNextSlot() {

337 assert(OccupiedSlotsMask && "Slot wasn't filled");

338 OccupiedSlotsMask = 0;

339

340

341 InstructionsGroupCandidate.clear();

342 LoadAlu();

343}

344

345void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {

346 int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);

347 if (DstIndex == -1) {

348 return;

349 }

350 Register DestReg = MI->getOperand(DstIndex).getReg();

351

352

353 for (const MachineOperand &MO : MI->all_uses())

354 if (MO.getReg() == DestReg)

355 return;

356

357 switch (Slot) {

358 case 0:

359 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);

360 break;

361 case 1:

362 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);

363 break;

364 case 2:

365 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);

366 break;

367 case 3:

368 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);

369 break;

370 }

371}

372

373SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {

374 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};

375 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);

376 if (SlotedSU)

377 return SlotedSU;

378 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);

379 if (UnslotedSU)

380 AssignSlot(UnslotedSU->getInstr(), Slot);

381 return UnslotedSU;

382}

383

384unsigned R600SchedStrategy::AvailablesAluCount() const {

385 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +

386 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +

387 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +

388 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +

389 AvailableAlus[AluPredX].size();

390}

391

392SUnit* R600SchedStrategy::pickAlu() {

393 while (AvailablesAluCount() || !Pending[IDAlu].empty()) {

394 if (!OccupiedSlotsMask) {

395

396 if (!AvailableAlus[AluPredX].empty()) {

397 OccupiedSlotsMask |= 31;

398 return PopInst(AvailableAlus[AluPredX], false);

399 }

400

401 if (!AvailableAlus[AluDiscarded].empty()) {

402 OccupiedSlotsMask |= 31;

403 return PopInst(AvailableAlus[AluDiscarded], false);

404 }

405

406 if (!AvailableAlus[AluT_XYZW].empty()) {

407 OccupiedSlotsMask |= 15;

408 return PopInst(AvailableAlus[AluT_XYZW], false);

409 }

410 }

411 bool TransSlotOccupied = OccupiedSlotsMask & 16;

412 if (!TransSlotOccupied && VLIW5) {

413 if (!AvailableAlus[AluTrans].empty()) {

414 OccupiedSlotsMask |= 16;

415 return PopInst(AvailableAlus[AluTrans], false);

416 }

417 SUnit *SU = AttemptFillSlot(3, true);

418 if (SU) {

419 OccupiedSlotsMask |= 16;

420 return SU;

421 }

422 }

423 for (int Chan = 3; Chan > -1; --Chan) {

424 bool isOccupied = OccupiedSlotsMask & (1 << Chan);

425 if (!isOccupied) {

426 SUnit *SU = AttemptFillSlot(Chan, false);

427 if (SU) {

428 OccupiedSlotsMask |= (1 << Chan);

429 InstructionsGroupCandidate.push_back(SU->getInstr());

430 return SU;

431 }

432 }

433 }

434 PrepareNextSlot();

435 }

436 return nullptr;

437}

438

439SUnit* R600SchedStrategy::pickOther(int QID) {

440 SUnit *SU = nullptr;

441 std::vector<SUnit *> &AQ = Available[QID];

442

443 if (AQ.empty()) {

444 MoveUnits(Pending[QID], AQ);

445 }

446 if (!AQ.empty()) {

447 SU = AQ.back();

448 AQ.pop_back();

449 }

450 return SU;

451}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Promote Memory to Register

Provides R600 specific target descriptions.

static unsigned getWFCountLimitedByGPR(unsigned GPRCount)

Definition R600MachineScheduler.cpp:47

static bool isPhysicalRegCopy(MachineInstr *MI)

Definition R600MachineScheduler.cpp:175

R600 Machine Scheduler interface.

AMDGPU R600 specific subclass of TargetSubtarget.

Representation of each machine instruction.

mop_iterator operands_begin()

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

mop_iterator operands_end()

MachineOperand * mop_iterator

iterator/begin/end - Iterate over all operands of a machine instruction.

MachineOperand class - Representation of each machine instruction operand.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

Register getReg() const

getReg - Returns the register number.

void initialize(ScheduleDAGMI *dag) override

Initialize the strategy after building the DAG for a new region.

Definition R600MachineScheduler.cpp:22

SUnit * pickNode(bool &IsTopNode) override

Pick the next node to schedule, or return NULL.

Definition R600MachineScheduler.cpp:52

void releaseBottomNode(SUnit *SU) override

When all successor dependencies have been resolved, free this node for bottom-up scheduling.

Definition R600MachineScheduler.cpp:186

void releaseTopNode(SUnit *SU) override

When all predecessor dependencies have been resolved, free this node for top-down scheduling.

Definition R600MachineScheduler.cpp:182

void schedNode(SUnit *SU, bool IsTopNode) override

Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...

Definition R600MachineScheduler.cpp:135

Wrapper class representing virtual and physical registers.

Scheduling unit. This is a node in the scheduling DAG.

bool isScheduled

True once scheduled.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...

ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...

virtual bool hasVRegLiveness() const

Return true if this DAG supports VReg liveness and RegPressure.

bool contains(Register Reg) const

Return true if the specified register is included in this register class.

This is an optimization pass for GlobalISel generic memory operations.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.