LLVM: lib/Target/AMDGPU/GCNSchedStrategy.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

30

31#define DEBUG_TYPE "machine-scheduler"

32

33using namespace llvm;

34

36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,

37 cl::desc("Disable unclustered high register pressure "

38 "reduction scheduling stage."),

40

42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,

43 cl::desc("Disable clustered low occupancy "

44 "rescheduling for ILP scheduling stage."),

46

48 "amdgpu-schedule-metric-bias", cl::Hidden,

50 "Sets the bias which adds weight to occupancy vs latency. Set it to "

51 "100 to chase the occupancy only."),

53

56 cl::desc("Relax occupancy targets for kernels which are memory "

57 "bound (amdgpu-membound-threshold), or "

58 "Wave Limited (amdgpu-limit-wave-threshold)."),

60

62 "amdgpu-use-amdgpu-trackers", cl::Hidden,

63 cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),

65

67

70 DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {

71}

72

75

77

79

84

86

87

88

89

90

95

99 } else {

100

101

102

103 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "

104 "VGPRCriticalLimit calculation method.\n");

105

109 VGPRBudget = std::max(VGPRBudget, Granule);

111 }

112

113

118

123}

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

143 return false;

144

145

146

148 if (Op.isReg() || Op.isImplicit())

149 continue;

150 if (Op.getReg().isPhysical() ||

151 (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))

152 return false;

153 }

154 return true;

155}

156

159 std::vector &Pressure, std::vector &MaxPressure,

162

163

166 AtTop

169

170 return;

171 }

172

173

174 Pressure.resize(4, 0);

177 if (AtTop) {

180 } else {

182 TempUpwardTracker.recede(*MI);

183 NewPressure = TempUpwardTracker.getPressure();

184 }

185 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();

186 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =

188 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();

189}

190

191

192

194 return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;

195}

196

198 bool AtTop,

201 unsigned SGPRPressure,

202 unsigned VGPRPressure, bool IsBottomUp) {

203 Cand.SU = SU;

204 Cand.AtTop = AtTop;

205

207 return;

208

211

212

213

214

215

216

217

218

219

220

221

222

226 } else {

227

229 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;

230 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;

231

233 if (!Diff.isValid())

234 continue;

235

236

238 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());

239 }

240

241#ifdef EXPENSIVE_CHECKS

242 std::vector CheckPressure, CheckMaxPressure;

245 if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=

246 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||

247 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=

248 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {

249 errs() << "Register Pressure is inaccurate when calculated through "

250 "PressureDiff\n"

251 << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]

252 << ", expected "

253 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"

254 << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]

255 << ", expected "

256 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";

258 }

259#endif

260 }

261

262 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

263 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];

264

265

266

267

268

269

270

271

272

273 const unsigned MaxVGPRPressureInc = 16;

274 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;

275 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;

276

277

278

279

280

281

282

283

284

285

286 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {

290 }

291

292 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {

296 }

297

298

299

300

301

302

305

306 if (SGPRDelta >= 0 || VGPRDelta >= 0) {

308 if (SGPRDelta > VGPRDelta) {

310 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);

312 } else {

314 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);

316 }

317 }

318}

319

320

321

326 bool IsBottomUp) {

329 unsigned SGPRPressure = 0;

330 unsigned VGPRPressure = 0;

333 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

334 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];

335 } else {

339 SGPRPressure = T->getPressure().getSGPRNum();

340 VGPRPressure = T->getPressure().getArchVGPRNum();

341 }

342 }

344 for (SUnit *SU : Q) {

345

347 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,

348 VGPRPressure, IsBottomUp);

349

353

358 }

359 }

360}

361

362

363

365

366

368 IsTopNode = false;

369 return SU;

370 }

372 IsTopNode = true;

373 return SU;

374 }

375

376

379

380

383

384

390 true);

392 } else {

394#ifndef NDEBUG

399 true);

401 "Last pick result should correspond to re-picking right now");

402 }

403#endif

404 }

405

406

412 false);

414 } else {

416#ifndef NDEBUG

421 false);

423 "Last pick result should correspond to re-picking right now");

424 }

425#endif

426 }

427

428

436 }

438

439 IsTopNode = Cand.AtTop;

440 return Cand.SU;

441}

442

443

444

449 return nullptr;

450 }

452 do {

455 if (!SU) {

459 false);

462 }

463 IsTopNode = true;

466 if (!SU) {

470 true);

473 }

474 IsTopNode = false;

475 } else {

477 }

479

484

487 return SU;

488}

489

495 }

496

498}

499

503}

504

509 else

511

513}

514

518}

519

523}

524

533}

534

538}

539

543

546 return true;

547 }

548

549

554

555

559

560 bool SameBoundary = Zone != nullptr;

561 if (SameBoundary) {

562

566

567

576

577

578 if (tryLatency(TryCand, Cand, *Zone))

580

581

585 }

586

587

588

589

590

591

592

593 const SUnit *CandNextClusterSU =

595 const SUnit *TryCandNextClusterSU =

597 if (tryGreater(TryCand.SU == TryCandNextClusterSU,

598 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))

600

601

606

607

612

613 if (SameBoundary) {

614

618 return true;

619 }

620 }

621 return false;

622}

623

628}

629

630

631

632

633

634

635

636

637

638

639

643

646 return true;

647 }

648

649

653

655

659

660

664 }

665

666

667

668 const SUnit *CandNextClusterSU =

670 const SUnit *TryCandNextClusterSU =

672 if (tryGreater(TryCand.SU == TryCandNextClusterSU,

673 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))

675

676

677

678

679

680

681 bool SameBoundary = Zone != nullptr;

682 if (SameBoundary) {

683

684

685

689

690

691

692

693

694 bool TryMayLoad =

697

698 if (TryMayLoad || CandMayLoad) {

699 bool TryLongLatency =

701 bool CandLongLatency =

703

704 if (tryGreater(Zone->isTop() ? TryLongLatency : CandLongLatency,

705 Zone->isTop() ? CandLongLatency : TryLongLatency, TryCand,

708 }

709

713 }

714

715 if (SameBoundary) {

716

720 }

721

722

727

728 if (SameBoundary) {

729

738

739

740

744

745

749 return true;

750 }

751 }

752

753 return false;

754}

755

760 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),

761 RegionLiveOuts(this, true) {

762

763 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");

766 if (MinOccupancy != StartingOccupancy)

767 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy

768 << ".\n");

769 }

770}

771

772std::unique_ptr

773GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {

774 switch (SchedStageID) {

776 return std::make_unique(SchedStageID, *this);

778 return std::make_unique(SchedStageID, *this);

780 return std::make_unique(SchedStageID, *this);

782 return std::make_unique(SchedStageID, *this);

784 return std::make_unique(SchedStageID, *this);

786 return std::make_unique(SchedStageID,

787 *this);

788 }

789

791}

792

794

795

797}

798

800GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {

803 return RPTracker.moveMaxPressure();

804}

805

808 auto REnd = RegionEnd == RegionBegin->getParent()->end()

809 ? std::prev(RegionEnd)

810 : RegionEnd;

812}

813

814void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,

817

818

819

820

821

822

823

824

825

829 if (!Candidate->empty() && Candidate->pred_size() == 1) {

832 OnlySucc = Candidate;

833 }

834 }

835

836

837 size_t CurRegion = RegionIdx;

838 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)

839 if (Regions[CurRegion].first->getParent() != MBB)

840 break;

841 --CurRegion;

842

844 auto LiveInIt = MBBLiveIns.find(MBB);

845 auto &Rgn = Regions[CurRegion];

847 if (LiveInIt != MBBLiveIns.end()) {

848 auto LiveIn = std::move(LiveInIt->second);

850 MBBLiveIns.erase(LiveInIt);

851 } else {

852 I = Rgn.first;

853 auto LRS = BBLiveInMap.lookup(NonDbgMI);

854#ifdef EXPENSIVE_CHECKS

856#endif

858 }

859

860 for (;;) {

862

863 if (Regions[CurRegion].first == I || NonDbgMI == I) {

864 LiveIns[CurRegion] = RPTracker.getLiveRegs();

866 }

867

868 if (Regions[CurRegion].second == I) {

869 Pressure[CurRegion] = RPTracker.moveMaxPressure();

870 if (CurRegion-- == RegionIdx)

871 break;

872 }

875 }

876

877 if (OnlySucc) {

881 }

883 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();

884 }

885}

886

888GCNScheduleDAGMILive::getRegionLiveInMap() const {

889 assert(!Regions.empty());

890 std::vector<MachineInstr *> RegionFirstMIs;

891 RegionFirstMIs.reserve(Regions.size());

892 auto I = Regions.rbegin(), E = Regions.rend();

893 auto *BB = I->first->getParent();

894 do {

896 RegionFirstMIs.push_back(MI);

897 do {

898 ++I;

899 } while (I != E && I->first->getParent() == BB);

900 } while (I != E);

902}

903

905GCNScheduleDAGMILive::getRegionLiveOutMap() const {

906 assert(!Regions.empty());

907 std::vector<MachineInstr *> RegionLastMIs;

908 RegionLastMIs.reserve(Regions.size());

911

913}

914

916 IdxToInstruction.clear();

917

918 RegionLiveRegMap =

919 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();

920 for (unsigned I = 0; I < DAG->Regions.size(); I++) {

922 IsLiveOut

924 : &*DAG->Regions[I].first;

925 IdxToInstruction[I] = RegionKey;

926 }

927}

928

930

931

932

933 LiveIns.resize(Regions.size());

934 Pressure.resize(Regions.size());

935 RescheduleRegions.resize(Regions.size());

936 RegionsWithHighRP.resize(Regions.size());

937 RegionsWithExcessRP.resize(Regions.size());

938 RegionsWithMinOcc.resize(Regions.size());

939 RegionsWithIGLPInstrs.resize(Regions.size());

940 RescheduleRegions.set();

941 RegionsWithHighRP.reset();

942 RegionsWithExcessRP.reset();

943 RegionsWithMinOcc.reset();

944 RegionsWithIGLPInstrs.reset();

945

946 runSchedStages();

947}

948

949void GCNScheduleDAGMILive::runSchedStages() {

950 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");

951

952 if (!Regions.empty()) {

953 BBLiveInMap = getRegionLiveInMap();

956 }

957

961 if (!Stage->initGCNSchedStage())

962 continue;

963

964 for (auto Region : Regions) {

967

968 if (!Stage->initGCNRegion()) {

969 Stage->advanceRegion();

971 continue;

972 }

973

978 &LiveIns[Stage->getRegionIdx()];

979

980 reinterpret_cast<GCNRPTracker *>(DownwardTracker)

981 ->reset(MRI, *RegionLiveIns);

982 reinterpret_cast<GCNRPTracker *>(UpwardTracker)

984 Stage->getRegionIdx()));

985 }

986

988 Stage->finalizeGCNRegion();

989 }

990

991 Stage->finalizeGCNSchedStage();

992 }

993}

994

995#ifndef NDEBUG

997 switch (StageID) {

999 OS << "Max Occupancy Initial Schedule";

1000 break;

1002 OS << "Unclustered High Register Pressure Reschedule";

1003 break;

1005 OS << "Clustered Low Occupancy Reschedule";

1006 break;

1008 OS << "Pre-RA Rematerialize";

1009 break;

1011 OS << "Max ILP Initial Schedule";

1012 break;

1014 OS << "Max memory clause Initial Schedule";

1015 break;

1016 }

1017

1018 return OS;

1019}

1020#endif

1021

1023 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),

1024 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}

1025

1028 return false;

1029

1031 return true;

1032}

1033

1036 return false;

1037

1039 return false;

1040

1041 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())

1042 return false;

1043

1047

1048 InitialOccupancy = DAG.MinOccupancy;

1049

1050

1055

1058 << "Retrying function scheduling without clustering. "

1059 "Aggressivly try to reduce register pressure to achieve occupancy "

1060 << DAG.MinOccupancy << ".\n");

1061

1062 return true;

1063}

1064

1067 return false;

1068

1070 return false;

1071

1072

1073

1074

1075 if (DAG.StartingOccupancy <= DAG.MinOccupancy)

1076 return false;

1077

1079 dbgs() << "Retrying function scheduling with lowest recorded occupancy "

1080 << DAG.MinOccupancy << ".\n");

1081 return true;

1082}

1083

1086 return false;

1087

1088 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)

1089 return false;

1090

1092

1094 DAG.MinOccupancy)

1095 return false;

1096

1097

1098

1099

1100

1102

1103 collectRematerializableInstructions();

1104 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))

1105 return false;

1106

1108 dbgs() << "Retrying function scheduling with improved occupancy of "

1109 << DAG.MinOccupancy << " from rematerializing\n");

1110 return true;

1111}

1112

1116}

1117

1121 if (DAG.MinOccupancy > InitialOccupancy) {

1122 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)

1123 DAG.RegionsWithMinOcc[IDX] =

1124 DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;

1125

1127 << " stage successfully increased occupancy to "

1128 << DAG.MinOccupancy << '\n');

1129 }

1130

1132}

1133

1135

1138

1139 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());

1141

1142

1144 return false;

1145

1146 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

1149 << "\n From: " << *DAG.begin() << " To: ";

1151 else dbgs() << "End";

1152 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');

1153

1154

1159 for (auto &I : DAG) {

1162 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;

1163 }

1164 } else {

1165 for (auto &I : DAG)

1167 }

1168

1170

1172 dbgs() << "Pressure before scheduling:\nRegion live-ins:"

1174 << "Region live-in pressure: "

1177

1180

1190 }

1191

1192 return true;

1193}

1194

1196

1197

1199 DAG.MinOccupancy <= InitialOccupancy) &&

1201 return false;

1202

1204}

1205

1207

1208

1209

1210

1211

1213 return false;

1214

1216}

1217

1220 return false;

1221

1223}

1224

1228

1231

1232

1237}

1238

1244

1245

1246

1248

1252

1255}

1256

1258

1260

1263

1269

1270

1271 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");

1272 return;

1273 }

1274

1275 unsigned TargetOccupancy =

1277 unsigned WavesAfter =

1279 unsigned WavesBefore =

1281 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore

1282 << ", after " << WavesAfter << ".\n");

1283

1284

1285

1286

1287

1288 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);

1289

1290

1291

1292 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&

1294 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "

1296 NewOccupancy = WavesAfter;

1297 }

1298

1299 if (NewOccupancy < DAG.MinOccupancy) {

1300 DAG.MinOccupancy = NewOccupancy;

1302 DAG.RegionsWithMinOcc.reset();

1303 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "

1304 << DAG.MinOccupancy << ".\n");

1305 }

1306

1307

1309

1310

1313

1321 }

1322

1323

1324

1327 } else {

1331 }

1332}

1333

1334unsigned

1338 unsigned ReadyCycle = CurrCycle;

1339 for (auto &D : SU.Preds) {

1340 if (D.isAssignedRegDep()) {

1342 unsigned Latency = SM.computeInstrLatency(DefMI);

1344 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);

1345 }

1346 }

1347 ReadyCycles[SU.NodeNum] = ReadyCycle;

1348 return ReadyCycle;

1349}

1350

1351#ifndef NDEBUG

1354 std::pair<MachineInstr *, unsigned> B) const {

1355 return A.second < B.second;

1356 }

1357};

1358

1361 if (ReadyCycles.empty())

1362 return;

1363 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();

1364 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum

1365 << " ##################\n# Cycle #\t\t\tInstruction "

1366 " "

1367 " \n";

1368 unsigned IPrev = 1;

1369 for (auto &I : ReadyCycles) {

1370 if (I.second > IPrev + 1)

1371 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev

1372 << " CYCLES DETECTED ******************************\n\n";

1373 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";

1374 IPrev = I.second;

1375 }

1376}

1377#endif

1378

1381#ifndef NDEBUG

1383 ReadyCyclesSorted;

1384#endif

1386 unsigned SumBubbles = 0;

1388 unsigned CurrCycle = 0;

1389 for (auto &SU : InputSchedule) {

1390 unsigned ReadyCycle =

1392 SumBubbles += ReadyCycle - CurrCycle;

1393#ifndef NDEBUG

1394 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));

1395#endif

1396 CurrCycle = ++ReadyCycle;

1397 }

1398#ifndef NDEBUG

1401 dbgs() << "\n\t"

1402 << "Metric: "

1403 << (SumBubbles

1405 : 1)

1406 << "\n\n");

1407#endif

1408

1410}

1411

1414#ifndef NDEBUG

1416 ReadyCyclesSorted;

1417#endif

1419 unsigned SumBubbles = 0;

1421 unsigned CurrCycle = 0;

1422 for (auto &MI : DAG) {

1424 if (!SU)

1425 continue;

1426 unsigned ReadyCycle =

1428 SumBubbles += ReadyCycle - CurrCycle;

1429#ifndef NDEBUG

1430 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));

1431#endif

1432 CurrCycle = ++ReadyCycle;

1433 }

1434#ifndef NDEBUG

1437 dbgs() << "\n\t"

1438 << "Metric: "

1439 << (SumBubbles

1441 : 1)

1442 << "\n\n");

1443#endif

1444

1446}

1447

1449 if (WavesAfter < DAG.MinOccupancy)

1450 return true;

1451

1452 return false;

1453}

1454

1457 return false;

1458

1460 return true;

1461

1463 return true;

1464

1465 return false;

1466}

1467

1469

1470

1474 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");

1475 return true;

1476 }

1477

1478

1480 return false;

1481

1484 << "\n\t *** In shouldRevertScheduling ***\n"

1485 << " *********** BEFORE UnclusteredHighRPStage ***********\n");

1490 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");

1492 unsigned OldMetric = MBefore.getMetric();

1493 unsigned NewMetric = MAfter.getMetric();

1494 unsigned WavesBefore =

1496 unsigned Profit =

1499 NewMetric) /

1501 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "

1502 << MAfter << "Profit: " << Profit << "\n");

1504}

1505

1508 return false;

1509

1511 return true;

1512

1514 return true;

1515

1516 return false;

1517}

1518

1521 return true;

1522

1524 return true;

1525

1526 return false;

1527}

1528

1531 return true;

1532

1533 return false;

1534}

1535

1537 unsigned WavesAfter) {

1539}

1540

1544 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");

1545 return true;

1546 }

1547

1548 return false;

1549}

1550

1554 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");

1559 int SkippedDebugInstr = 0;

1561 if (MI->isDebugInstr()) {

1562 ++SkippedDebugInstr;

1563 continue;

1564 }

1565

1569 if (MI->isDebugInstr())

1571 }

1572

1573

1574 for (auto &Op : MI->all_defs())

1575 Op.setIsUndef(false);

1578 if (MI->isDebugInstr()) {

1580

1583 } else {

1584

1586 }

1587 }

1591 }

1592

1593

1594

1595

1596 while (SkippedDebugInstr-- > 0)

1598

1599

1600

1601

1605 if (MI->isDebugInstr())

1606 continue;

1608 break;

1609 }

1610 }

1611

1612

1613

1615

1617}

1618

1619void PreRARematStage::collectRematerializableInstructions() {

1624 continue;

1625

1626

1629 continue;

1630

1633 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))

1634 continue;

1635

1637 if (Def->getParent() == UseI->getParent())

1638 continue;

1639

1640

1641

1642

1643 bool AddedToRematList = false;

1644 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

1645 auto It = DAG.LiveIns[I].find(Reg);

1646 if (It != DAG.LiveIns[I].end() && !It->second.none()) {

1647 if (DAG.RegionsWithMinOcc[I]) {

1648 RematerializableInsts[I][Def] = UseI;

1649 AddedToRematList = true;

1650 }

1651

1652

1653

1654 RematDefToLiveInRegions[Def].push_back(I);

1655 }

1656 }

1657 if (!AddedToRematList)

1658 RematDefToLiveInRegions.erase(Def);

1659 }

1660}

1661

1662bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,

1664

1665

1667 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>

1668 NewRegions;

1673

1674 NewRegions.resize(DAG.Regions.size());

1675 NewRescheduleRegions.resize(DAG.Regions.size());

1676

1677

1679 for (const auto &It : RematDefToLiveInRegions)

1680 ImpactedRegions.insert(It.second.begin(), It.second.end());

1681

1682

1683

1684 for (auto Idx : ImpactedRegions) {

1685 NewPressure[Idx] = DAG.Pressure[Idx];

1686 NewLiveIns[Idx] = DAG.LiveIns[Idx];

1687 }

1688 NewRegions = DAG.Regions;

1689 NewRescheduleRegions.reset();

1690

1692 bool Improved = false;

1693 for (auto I : ImpactedRegions) {

1694 if (DAG.RegionsWithMinOcc[I])

1695 continue;

1696

1697 Improved = false;

1699 int SGPRUsage = NewPressure[I].getSGPRNum();

1700

1701

1702

1704 break;

1705

1706

1707

1708 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {

1709 NewRescheduleRegions[I] = true;

1710 Improved = true;

1711 continue;

1712 }

1713

1714

1715

1716

1717 int TotalSinkableRegs = 0;

1718 for (const auto &It : RematerializableInsts[I]) {

1720 Register DefReg = Def->getOperand(0).getReg();

1721 TotalSinkableRegs +=

1723 }

1724 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;

1726

1727

1728 if (OptimisticOccupancy <= DAG.MinOccupancy)

1729 break;

1730

1731 unsigned ImproveOccupancy = 0;

1733 for (auto &It : RematerializableInsts[I]) {

1738

1739

1740

1741

1742 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,

1743 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);

1744 MachineInstr *NewMI = &*std::prev(InsertPos);

1748 InsertedMIToOldDef[NewMI] = Def;

1749

1750

1751

1752 DAG.updateRegionBoundaries(NewRegions, Def, nullptr,

1753 true);

1754

1755

1756 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);

1757

1759

1760

1761

1762 for (auto Idx : RematDefToLiveInRegions[Def]) {

1764 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {

1765

1767 } else {

1768

1771 NewRegions[Idx].first, NewRegions[Idx].second);

1772 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);

1773 RPT.advance(NewRegions[Idx].second);

1774 NewPressure[Idx] = RPT.moveMaxPressure();

1775 }

1776 }

1777

1779 ImproveOccupancy = NewPressure[I].getOccupancy(ST);

1780 if (ImproveOccupancy > DAG.MinOccupancy)

1781 break;

1782 }

1783

1784

1785 for (auto &Def : SinkedDefs)

1786 for (auto TrackedIdx : RematDefToLiveInRegions[Def])

1787 RematerializableInsts[TrackedIdx].erase(Def);

1788

1789 if (ImproveOccupancy <= DAG.MinOccupancy)

1790 break;

1791

1792 NewRescheduleRegions[I] = true;

1793 Improved = true;

1794 }

1795

1796 if (!Improved) {

1797

1798

1799 for (auto &Entry : InsertedMIToOldDef) {

1804 MI->eraseFromParent();

1808 }

1809 return false;

1810 }

1811

1812

1813 for (auto &Entry : InsertedMIToOldDef) {

1816

1817

1818 DAG.BBLiveInMap.erase(OldMI);

1819

1820

1826 }

1827

1828

1829 for (auto Idx : ImpactedRegions) {

1830 DAG.LiveIns[Idx] = NewLiveIns[Idx];

1831 DAG.Pressure[Idx] = NewPressure[Idx];

1832 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());

1833 }

1834 DAG.Regions = NewRegions;

1835 DAG.RescheduleRegions = NewRescheduleRegions;

1836

1839

1842

1843 return true;

1844}

1845

1846

1847bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {

1849 return false;

1850

1852 if (MO.getReg().isVirtual())

1853 return false;

1854

1855 return true;

1856}

1857

1858

1859

1860

1861

1862void GCNScheduleDAGMILive::updateRegionBoundaries(

1866 unsigned I = 0, E = RegionBoundaries.size();

1867

1868 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())

1869 ++I;

1870

1871 for (; I != E; ++I) {

1872 if (MI->getParent() != RegionBoundaries[I].first->getParent())

1873 return;

1874

1875 if (Removing && MI == RegionBoundaries[I].first &&

1876 MI == RegionBoundaries[I].second) {

1877

1878

1879 RegionBoundaries[I] =

1880 std::pair(MI->getParent()->end(), MI->getParent()->end());

1881 return;

1882 }

1883 if (MI == RegionBoundaries[I].first) {

1884 if (Removing)

1885 RegionBoundaries[I] =

1886 std::pair(std::next(MI), RegionBoundaries[I].second);

1887 else

1888

1890 RegionBoundaries[I].second);

1891 return;

1892 }

1893 if (Removing && MI == RegionBoundaries[I].second) {

1894 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));

1895 return;

1896 }

1897 }

1898}

1899

1903 });

1904}

1905

1908 bool RemoveKillFlags)

1910

1913 if (HasIGLPInstrs) {

1914 SavedMutations.clear();

1917 }

1918

1920}

1921

1923 if (HasIGLPInstrs)

1925

1927}

MachineInstrBuilder MachineInstrBuilder & DefMI

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))

static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))

static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)

static bool isIGLPMutationOnly(unsigned Opcode)

static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))

static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))

static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)

static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)

static bool canUsePressureDiffs(const SUnit &SU)

Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.

static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)

static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))

const HexagonInstrInfo * TII

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

uint32_t getLDSSize() const

unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const

Inverse of getMaxLocalMemWithWaveCount.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

void resize(unsigned N, bool t=false)

resize - Grow or shrink the bitvector.

bool none() const

none - Returns true if none of the bits are set.

bool initGCNSchedStage() override

bool shouldRevertScheduling(unsigned WavesAfter) override

bool initGCNRegion() override

This class represents an Operation in the Expression.

bool erase(const KeyT &Val)

bool advance(MachineInstr *MI=nullptr, bool UseInternalIterator=true)

Move to the state at the next MI.

GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const

Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...

GCNMaxILPSchedStrategy(const MachineSchedContext *C)

bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override

Apply a set of heuristics to a new candidate.

bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override

GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.

GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)

GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)

void finalizeSchedule() override

Allow targets to perform final scheduling actions at the level of the whole MachineFunction.

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)

GCNRegPressure getPressure() const

virtual bool initGCNRegion()

GCNRegPressure PressureBefore

bool isRegionWithExcessRP() const

bool mayCauseSpilling(unsigned WavesAfter)

ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)

GCNScheduleDAGMILive & DAG

const GCNSchedStageID StageID

std::vector< MachineInstr * > Unsched

GCNRegPressure PressureAfter

SIMachineFunctionInfo & MFI

unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)

virtual void finalizeGCNSchedStage()

virtual bool initGCNSchedStage()

virtual bool shouldRevertScheduling(unsigned WavesAfter)

std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations

GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)

MachineBasicBlock * CurrentMBB

This is a minimal scheduler strategy.

const unsigned HighRPSGPRBias

GCNDownwardRPTracker DownwardTracker

GCNSchedStrategy(const MachineSchedContext *C)

SmallVector< GCNSchedStageID, 4 > SchedStages

SUnit * pickNodeBidirectional(bool &IsTopNode)

void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)

unsigned SGPRCriticalLimit

std::vector< unsigned > MaxPressure

bool hasNextStage() const

GCNSchedStageID getCurrentStage()

SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage

unsigned VGPRCriticalLimit

void schedNode(SUnit *SU, bool IsTopNode) override

Update the scheduler's state after scheduling a node.

GCNDownwardRPTracker * getDownwardTracker()

std::vector< unsigned > Pressure

void initialize(ScheduleDAGMI *DAG) override

Initialize the strategy after building the DAG for a new region.

GCNUpwardRPTracker UpwardTracker

const unsigned HighRPVGPRBias

void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)

unsigned getTargetOccupancy()

SUnit * pickNode(bool &IsTopNode) override

Pick the best node to balance the schedule. Implements MachineSchedStrategy.

GCNUpwardRPTracker * getUpwardTracker()

GCNSchedStageID getNextStage() const

void finalizeSchedule() override

Allow targets to perform final scheduling actions at the level of the whole MachineFunction.

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)

unsigned getAddressableNumArchVGPRs() const

bool hasGFX90AInsts() const

unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const

Return occupancy for the given function.

const SIInstrInfo * getInstrInfo() const override

unsigned getMaxNumVGPRs(unsigned WavesPerEU) const

unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const

Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.

unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const

Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.

unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const

void recede(const MachineInstr &MI)

Move to the state of RP just before the MI .

void traceCandidate(const SchedCandidate &Cand)

void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)

Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...

MachineSchedPolicy RegionPolicy

const TargetSchedModel * SchedModel

const MachineSchedContext * Context

const TargetRegisterInfo * TRI

GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.

SchedCandidate BotCand

Candidate last picked from Bot boundary.

SchedCandidate TopCand

Candidate last picked from Top boundary.

virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const

Apply a set of heuristics to a new candidate.

void initialize(ScheduleDAGMI *dag) override

Initialize the strategy after building the DAG for a new region.

void schedNode(SUnit *SU, bool IsTopNode) override

Update the scheduler's state after scheduling a node.

bool shouldRevertScheduling(unsigned WavesAfter) override

bool hasInterval(Register Reg) const

SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)

void handleMove(MachineInstr &MI, bool UpdateFlags=false)

Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.

SlotIndexes * getSlotIndexes() const

SlotIndex getInstructionIndex(const MachineInstr &Instr) const

Returns the base index of the given instruction.

void RemoveMachineInstrFromMaps(MachineInstr &MI)

void removeInterval(Register Reg)

Interval removal.

LiveInterval & createAndComputeVirtRegInterval(Register Reg)

instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

succ_iterator succ_begin()

MachineInstr * remove(MachineInstr *I)

Remove the unbundled instruction from the instruction list without deleting it.

unsigned succ_size() const

MachineInstrBundleIterator< MachineInstr > iterator

StringRef getName() const

Return the name of the corresponding LLVM basic block, or an empty string.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

StringRef getName() const

getName - Return the name of the corresponding LLVM function.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Representation of each machine instruction.

const MachineBasicBlock * getParent() const

bool mayLoad(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly read memory.

iterator_range< mop_iterator > operands()

void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

void clearRegisterDeads(Register Reg)

Clear all dead flags on operands defining register Reg.

MachineOperand class - Representation of each machine instruction operand.

bool hasOneNonDBGUse(Register RegNo) const

hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.

const TargetRegisterClass * getRegClass(Register Reg) const

Return the register class of the specified virtual register.

MachineOperand * getOneDef(Register Reg) const

Returns the defining operand if there is exactly one operand defining the specified register,...

use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const

bool hasOneDef(Register RegNo) const

Return true if there is exactly one operand defining the specified register.

unsigned getNumVirtRegs() const

getNumVirtRegs - Return the number of virtual registers created.

bool shouldRevertScheduling(unsigned WavesAfter) override

bool shouldRevertScheduling(unsigned WavesAfter) override

bool shouldRevertScheduling(unsigned WavesAfter) override

bool initGCNRegion() override

bool initGCNSchedStage() override

Capture a change in pressure for a single pressure set.

Helpers for implementing custom MachineSchedStrategy classes.

Track the current register pressure at some position in the instruction stream, and remember the high...

void advance()

Advance across the current instruction.

void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)

Get the pressure of each PSet after traversing this instruction top-down.

const std::vector< unsigned > & getRegSetPressureAtPos() const

Get the register set pressure at the current position, which may be less than the pressure across the...

void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)

Get the pressure of each PSet after traversing this instruction bottom-up.

GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)

unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const

getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...

List of registers defined and used by a machine instruction.

void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)

Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...

void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)

Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...

void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)

Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...

Wrapper class representing virtual and physical registers.

static Register index2VirtReg(unsigned Index)

Convert a 0-based index to a virtual register number.

const TargetSchedModel & getSchedModel() const

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

unsigned getOccupancy() const

void increaseOccupancy(const MachineFunction &MF, unsigned Limit)

unsigned getMaxWavesPerEU() const

unsigned getMinWavesPerEU() const

unsigned getMinAllowedOccupancy() const

void limitOccupancy(const MachineFunction &MF)

static unsigned getNumCoveredRegs(LaneBitmask LM)

static bool isVGPRClass(const TargetRegisterClass *RC)

Scheduling unit. This is a node in the scheduling DAG.

bool isInstr() const

Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.

unsigned NodeNum

Entry # of node in the node vector.

unsigned short Latency

Node latency.

bool isScheduled

True once scheduled.

bool isBottomReady() const

SmallVector< SDep, 4 > Preds

All sunit predecessors.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

Each Scheduling boundary is associated with ready queues.

unsigned getLatencyStallCycles(SUnit *SU)

Get the difference between the given SUnit's ready time and the current cycle.

SUnit * pickOnlyChoice()

Call this before applying any other heuristics to the Available queue.

unsigned getCurrMOps() const

Micro-ops issued in the current cycle.

void removeReady(SUnit *SU)

Remove SU from the ready set for this boundary.

A ScheduleDAG for scheduling lists of MachineInstr.

MachineBasicBlock::iterator end() const

Returns an iterator to the bottom of the current scheduling region.

MachineBasicBlock * BB

The block in which to insert instructions.

MachineBasicBlock::iterator RegionEnd

The end of the range to be scheduled.

virtual void finalizeSchedule()

Allow targets to perform final scheduling actions at the level of the whole MachineFunction.

MachineBasicBlock::iterator begin() const

Returns an iterator to the top of the current scheduling region.

SUnit * getSUnit(MachineInstr *MI) const

Returns an existing SUnit for this MI, or nullptr.

virtual void exitRegion()

Called when the scheduler has finished scheduling the current region.

MachineBasicBlock::iterator RegionBegin

The beginning of the range to be scheduled.

unsigned NumRegionInstrs

Instructions in this region (distance(RegionBegin, RegionEnd)).

ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

PressureDiff & getPressureDiff(const SUnit *SU)

void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override

Implement the ScheduleDAGInstrs interface for handling the next scheduling region.

bool ShouldTrackLaneMasks

const RegPressureTracker & getBotRPTracker() const

bool isTrackingPressure() const

Return true if register pressure tracking is enabled.

const RegPressureTracker & getTopRPTracker() const

RegPressureTracker RPTracker

ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...

void startBlock(MachineBasicBlock *bb) override

Prepares to perform scheduling in the given block.

void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)

Add a postprocessing step to the DAG builder.

MachineBasicBlock::iterator top() const

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

MachineBasicBlock::iterator bottom() const

void finishBlock() override

Cleans up after scheduling in the given block.

const SUnit * getNextClusterPred() const

void placeDebugValues()

Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.

const SUnit * getNextClusterSucc() const

std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations

Ordered list of DAG postprocessing steps.

MachineRegisterInfo & MRI

Virtual/real register map.

const TargetInstrInfo * TII

Target instruction information.

std::vector< SUnit > SUnits

The scheduling units.

const TargetRegisterInfo * TRI

Target processor register info.

MachineFunction & MF

Machine function.

static const unsigned ScaleFactor

unsigned getMetric() const

SlotIndex - An opaque wrapper around machine indexes.

SlotIndex getRegSlot(bool EC=false) const

Returns the register use/def slot in the current instruction for a normal or early-clobber def.

SlotIndex getMBBStartIdx(unsigned Num) const

Returns the first index in the given basic block number.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

TargetInstrInfo - Interface to description of machine instruction set.

bool isTriviallyReMaterializable(const MachineInstr &MI) const

Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...

Provide an instruction scheduling machine model to CodeGen passes.

virtual const TargetInstrInfo * getInstrInfo() const

bool initGCNSchedStage() override

bool initGCNRegion() override

void finalizeGCNSchedStage() override

bool shouldRevertScheduling(unsigned WavesAfter) override

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)

unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)

@ C

The default llvm calling convention, compatible with C.

Reg

All possible values of the reg field in the ModR/M byte.

initializer< Ty > init(const Ty &Val)

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)

unsigned getWeakLeft(const SUnit *SU, bool isTop)

GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)

std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)

Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

cl::opt< bool > VerifyScheduling

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)

IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)

Increment It until it points to a non-debug instruction or to End and return the resulting iterator.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)

@ UnclusteredHighRPReschedule

@ MemoryClauseInitialSchedule

@ ClusteredLowOccupancyReschedule

auto reverse(ContainerTy &&C)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)

IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)

Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...

raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)

creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...

GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)

bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)

Return true if this heuristic determines order.

Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

int biasPhysReg(const SUnit *SU, bool isTop)

Minimize physical register live ranges.

Implement std::hash so that hash_code can be used in STL containers.

bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const

unsigned getOccupancy(const GCNSubtarget &ST) const

unsigned getVGPRNum(bool UnifiedVGPRFile) const

unsigned getArchVGPRNum() const

unsigned getAGPRNum() const

unsigned getSGPRNum() const

bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const

Compares this GCNRegpressure to O, returning true if this is less.

Policy for scheduling the next instruction in the candidate's zone.

Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...

void setBest(SchedCandidate &Best)

void reset(const CandPolicy &NewPolicy)

void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)

SchedResourceDelta ResDelta

Status of an instruction's critical resource consumption.

unsigned DemandedResources

static constexpr LaneBitmask getNone()

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

RegisterClassInfo * RegClassInfo

bool DisableLatencyHeuristic

PressureChange CriticalMax

PressureChange CurrentMax

bool IsAcyclicLatencyLimited