LLVM: lib/Target/AMDGPU/GCNSchedStrategy.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

36

37#define DEBUG_TYPE "machine-scheduler"

38

39using namespace llvm;

40

42 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,

43 cl::desc("Disable unclustered high register pressure "

44 "reduction scheduling stage."),

46

48 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,

49 cl::desc("Disable clustered low occupancy "

50 "rescheduling for ILP scheduling stage."),

52

54 "amdgpu-schedule-metric-bias", cl::Hidden,

56 "Sets the bias which adds weight to occupancy vs latency. Set it to "

57 "100 to chase the occupancy only."),

59

62 cl::desc("Relax occupancy targets for kernels which are memory "

63 "bound (amdgpu-membound-threshold), or "

64 "Wave Limited (amdgpu-limit-wave-threshold)."),

66

68 "amdgpu-use-amdgpu-trackers", cl::Hidden,

69 cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),

71

73 "amdgpu-scheduler-pending-queue-limit", cl::Hidden,

75 "Max (Available+Pending) size to inspect pending queue (0 disables)"),

77

78#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

79#define DUMP_MAX_REG_PRESSURE

81 "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden,

82 cl::desc("Print a list of live registers along with their def/uses at the "

83 "point of maximum register pressure before scheduling."),

85

87 "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden,

88 cl::desc("Print a list of live registers along with their def/uses at the "

89 "point of maximum register pressure after scheduling."),

91#endif

92

94

99

102

104

106

108 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);

110 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);

111

113

114

115

116

117

122

127 } else {

128

129

130

131 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "

132 "VGPRCriticalLimit calculation method.\n");

134 unsigned Granule =

136 unsigned Addressable =

139 VGPRBudget = std::max(VGPRBudget, Granule);

141 }

142

143

148

153}

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

173 return false;

174

175

176

178 if (Op.isReg() || Op.isImplicit())

179 continue;

180 if (Op.getReg().isPhysical() ||

181 (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))

182 return false;

183 }

184 return true;

185}

186

189 std::vector &Pressure, std::vector &MaxPressure,

192

193

196 AtTop

199

200 return;

201 }

202

203

204 Pressure.resize(4, 0);

207 if (AtTop) {

210 } else {

212 TempUpwardTracker.recede(*MI);

213 NewPressure = TempUpwardTracker.getPressure();

214 }

215 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();

216 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =

218 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();

219}

220

222 bool AtTop,

225 unsigned SGPRPressure,

226 unsigned VGPRPressure, bool IsBottomUp) {

227 Cand.SU = SU;

228 Cand.AtTop = AtTop;

229

230 if (DAG->isTrackingPressure())

231 return;

232

235

236

237

238

239

240

241

242

243

244

245

246

250 } else {

251

253 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;

254 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;

255

256 for (const auto &Diff : DAG->getPressureDiff(SU)) {

257 if (!Diff.isValid())

258 continue;

259

260

262 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());

263 }

264

265#ifdef EXPENSIVE_CHECKS

266 std::vector CheckPressure, CheckMaxPressure;

269 if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=

270 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||

271 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=

272 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {

273 errs() << "Register Pressure is inaccurate when calculated through "

274 "PressureDiff\n"

275 << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]

276 << ", expected "

277 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"

278 << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]

279 << ", expected "

280 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";

282 }

283#endif

284 }

285

286 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

287 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];

288

289

290

291

292

293

294

295

296

297 const unsigned MaxVGPRPressureInc = 16;

298 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;

299 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;

300

301

302

303

304

305

306

307

308

309

310 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {

314 }

315

316 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {

320 }

321

322

323

324

325

326

329

330 if (SGPRDelta >= 0 || VGPRDelta >= 0) {

332 if (SGPRDelta > VGPRDelta) {

334 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);

336 } else {

338 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);

340 }

341 }

342}

343

346 bool HasBufferedModel =

350}

351

354

357 return OnlyChoice;

358

359 return nullptr;

360}

361

365 dbgs() << "Prefer:\t\t";

366 DAG->dumpNode(*Preferred.SU);

367

368 if (Current.SU) {

369 dbgs() << "Not:\t";

370 DAG->dumpNode(*Current.SU);

371 }

372

373 dbgs() << "Reason:\t\t";

375 });

376}

377

378

379

384 bool IsBottomUp) {

387 unsigned SGPRPressure = 0;

388 unsigned VGPRPressure = 0;

389 IsPending = false;

390 if (DAG->isTrackingPressure()) {

392 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

393 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];

394 } else {

398 SGPRPressure = T->getPressure().getSGPRNum();

399 VGPRPressure = T->getPressure().getArchVGPRNum();

400 }

401 }

404 for (SUnit *SU : AQ) {

405

407 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,

408 VGPRPressure, IsBottomUp);

409

413

418 } else {

420 }

421 }

422

424 return;

425

428 for (SUnit *SU : PQ) {

429

431 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,

432 VGPRPressure, IsBottomUp);

433

437

441 IsPending = true;

443 } else {

445 }

446 }

447}

448

449

450

452 bool &PickedPending) {

453

454

456 IsTopNode = false;

457 return SU;

458 }

460 IsTopNode = true;

461 return SU;

462 }

463

464

467

468

471

472 bool BotPending = false;

473

476 BotCand.Policy != BotPolicy) {

479 BotPending,

480 true);

481 assert(BotCand.Reason != NoCand && "failed to find the first candidate");

482 } else {

484#ifndef NDEBUG

489 BotPending,

490 true);

492 "Last pick result should correspond to re-picking right now");

493 }

494#endif

495 }

496

497 bool TopPending = false;

498

501 TopCand.Policy != TopPolicy) {

504 TopPending,

505 false);

506 assert(TopCand.Reason != NoCand && "failed to find the first candidate");

507 } else {

509#ifndef NDEBUG

514 TopPending,

515 false);

517 "Last pick result should correspond to re-picking right now");

518 }

519#endif

520 }

521

522

527 PickedPending = BotPending && TopPending;

528

530 if (BotPending || TopPending) {

532 } else {

534 }

535

537 Cand.setBest(TryCand);

538 }

539

541

542 IsTopNode = Cand.AtTop;

543 return Cand.SU;

544}

545

546

547

549 if (DAG->top() == DAG->bottom()) {

550 assert(Top.Available.empty() && Top.Pending.empty() &&

551 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

552 return nullptr;

553 }

554 bool PickedPending;

556 do {

557 PickedPending = false;

560 if (!SU) {

564 PickedPending,

565 false);

568 }

569 IsTopNode = true;

572 if (!SU) {

576 PickedPending,

577 true);

580 }

581 IsTopNode = false;

582 } else {

584 }

586

587 if (PickedPending) {

591 if (ReadyCycle > CurrentCycle)

593

594

595

596

599

601 }

602

604 Top.removeReady(SU);

606 Bot.removeReady(SU);

607

610 return SU;

611}

612

622

627

637

642

647

651

654 return true;

655 }

656

657

661

662

663 if (DAG->isTrackingPressure() &&

667

668

669 if (DAG->isTrackingPressure() &&

673

674 bool SameBoundary = Zone != nullptr;

675 if (SameBoundary) {

684 }

685

686 return false;

687}

688

698

703

707

710 return true;

711 }

712

713

714 if (DAG->isTrackingPressure() &&

718

719

723

724 bool SameBoundary = Zone != nullptr;

725 if (SameBoundary) {

726

730

731

740

741

742 if (tryLatency(TryCand, Cand, *Zone))

744

745

749 }

750

751

752

753

754

755

756

759 bool CandIsClusterSucc =

761 bool TryCandIsClusterSucc =

763 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

766

767

768 if (DAG->isTrackingPressure() &&

772

773

774 if (DAG->isTrackingPressure() &&

778

779 if (SameBoundary) {

780

784 return true;

785 }

786 }

787 return false;

788}

789

795

796

797

798

799

800

801

802

803

804

805

809

812 return true;

813 }

814

815

819

820 if (DAG->isTrackingPressure()) {

821

825

826

830 }

831

832

833

836 bool CandIsClusterSucc =

838 bool TryCandIsClusterSucc =

840 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

843

844

845

846

847

848

849 bool SameBoundary = Zone != nullptr;

850 if (SameBoundary) {

851

852

853

854 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&

857

858

859

860

861

862 bool TryMayLoad =

865

866 if (TryMayLoad || CandMayLoad) {

867 bool TryLongLatency =

869 bool CandLongLatency =

871

872 if (tryGreater(Zone->isTop() ? TryLongLatency : CandLongLatency,

873 Zone->isTop() ? CandLongLatency : TryLongLatency, TryCand,

876 }

877

881 }

882

883 if (SameBoundary) {

884

888 }

889

890

891 if (DAG->isTrackingPressure() &&

895

896 if (SameBoundary) {

897

906

907

908

910 Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))

912

913

917 return true;

918 }

919 }

920

921 return false;

922}

923

928 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),

929 RegionLiveOuts(this, true) {

930

931

932

933

935 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");

937 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);

938 if (MinOccupancy != StartingOccupancy)

939 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy

940 << ".\n");

941 }

942}

943

944std::unique_ptr

945GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {

946 switch (SchedStageID) {

948 return std::make_unique(SchedStageID, *this);

950 return std::make_unique(SchedStageID, *this);

952 return std::make_unique(SchedStageID, *this);

954 return std::make_unique(SchedStageID, *this);

956 return std::make_unique(SchedStageID, *this);

958 return std::make_unique(SchedStageID,

959 *this);

960 }

961

963}

964

970

972GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {

974 RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,

975 &LiveIns[RegionIdx]);

976 return RPTracker.moveMaxPressure();

977}

978

981 assert(RegionBegin != RegionEnd && "Region must not be empty");

983}

984

985void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,

988

989

990

991

992

993

994

995

996

997 const MachineBasicBlock *OnlySucc = nullptr;

1000 if (!Candidate->empty() && Candidate->pred_size() == 1) {

1001 SlotIndexes *Ind = LIS->getSlotIndexes();

1003 OnlySucc = Candidate;

1004 }

1005 }

1006

1007

1008 size_t CurRegion = RegionIdx;

1009 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)

1010 if (Regions[CurRegion].first->getParent() != MBB)

1011 break;

1012 --CurRegion;

1013

1015 auto LiveInIt = MBBLiveIns.find(MBB);

1016 auto &Rgn = Regions[CurRegion];

1018 if (LiveInIt != MBBLiveIns.end()) {

1019 auto LiveIn = std::move(LiveInIt->second);

1021 MBBLiveIns.erase(LiveInIt);

1022 } else {

1023 I = Rgn.first;

1024 auto LRS = BBLiveInMap.lookup(NonDbgMI);

1025#ifdef EXPENSIVE_CHECKS

1027#endif

1029 }

1030

1031 for (;;) {

1033

1034 if (Regions[CurRegion].first == I || NonDbgMI == I) {

1035 LiveIns[CurRegion] = RPTracker.getLiveRegs();

1037 }

1038

1039 if (Regions[CurRegion].second == I) {

1040 Pressure[CurRegion] = RPTracker.moveMaxPressure();

1041 if (CurRegion-- == RegionIdx)

1042 break;

1043 auto &Rgn = Regions[CurRegion];

1045 }

1048 }

1049

1050 if (OnlySucc) {

1054 }

1056 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();

1057 }

1058}

1059

1061GCNScheduleDAGMILive::getRegionLiveInMap() const {

1062 assert(!Regions.empty());

1063 std::vector<MachineInstr *> RegionFirstMIs;

1064 RegionFirstMIs.reserve(Regions.size());

1066 RegionFirstMIs.push_back(

1068

1069 return getLiveRegMap(RegionFirstMIs, false, *LIS);

1070}

1071

1073GCNScheduleDAGMILive::getRegionLiveOutMap() const {

1074 assert(!Regions.empty());

1075 std::vector<MachineInstr *> RegionLastMIs;

1076 RegionLastMIs.reserve(Regions.size());

1078

1080 continue;

1082 }

1084}

1085

1087 IdxToInstruction.clear();

1088

1089 RegionLiveRegMap =

1090 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();

1091 for (unsigned I = 0; I < DAG->Regions.size(); I++) {

1092 auto &[RegionBegin, RegionEnd] = DAG->Regions[I];

1093

1094 if (RegionBegin == RegionEnd)

1095 continue;

1097 IsLiveOut ? getLastMIForRegion(RegionBegin, RegionEnd) : &*RegionBegin;

1098 IdxToInstruction[I] = RegionKey;

1099 }

1100}

1101

1103

1104

1105

1106 LiveIns.resize(Regions.size());

1107 Pressure.resize(Regions.size());

1108 RegionsWithHighRP.resize(Regions.size());

1109 RegionsWithExcessRP.resize(Regions.size());

1110 RegionsWithIGLPInstrs.resize(Regions.size());

1111 RegionsWithHighRP.reset();

1112 RegionsWithExcessRP.reset();

1113 RegionsWithIGLPInstrs.reset();

1114

1115 runSchedStages();

1116}

1117

1118void GCNScheduleDAGMILive::runSchedStages() {

1119 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");

1120

1121 if (!Regions.empty()) {

1122 BBLiveInMap = getRegionLiveInMap();

1125 }

1126

1127#ifdef DUMP_MAX_REG_PRESSURE

1132 }

1133#endif

1134

1135 GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);

1138 if (!Stage->initGCNSchedStage())

1139 continue;

1140

1141 for (auto Region : Regions) {

1144

1145 if (!Stage->initGCNRegion()) {

1146 Stage->advanceRegion();

1148 continue;

1149 }

1150

1155 &LiveIns[Stage->getRegionIdx()];

1156

1157 reinterpret_cast<GCNRPTracker *>(DownwardTracker)

1158 ->reset(MRI, *RegionLiveIns);

1159 reinterpret_cast<GCNRPTracker *>(UpwardTracker)

1160 ->reset(MRI, RegionLiveOuts.getLiveRegsForRegionIdx(

1161 Stage->getRegionIdx()));

1162 }

1163

1165 Stage->finalizeGCNRegion();

1166 }

1167

1168 Stage->finalizeGCNSchedStage();

1169 }

1170

1171#ifdef DUMP_MAX_REG_PRESSURE

1175 LIS->dump();

1176 }

1177#endif

1178}

1179

1180#ifndef NDEBUG

1182 switch (StageID) {

1184 OS << "Max Occupancy Initial Schedule";

1185 break;

1187 OS << "Unclustered High Register Pressure Reschedule";

1188 break;

1190 OS << "Clustered Low Occupancy Reschedule";

1191 break;

1193 OS << "Pre-RA Rematerialize";

1194 break;

1196 OS << "Max ILP Initial Schedule";

1197 break;

1199 OS << "Max memory clause Initial Schedule";

1200 break;

1201 }

1202

1203 return OS;

1204}

1205#endif

1206

1210

1212 if (DAG.LIS)

1213 return false;

1214

1216 return true;

1217}

1218

1221 return false;

1222

1224 return false;

1225

1226 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())

1227 return false;

1228

1230 DAG.addMutation(

1232

1233 InitialOccupancy = DAG.MinOccupancy;

1234

1235

1236 TempTargetOccupancy = MFI.getMaxWavesPerEU() > DAG.MinOccupancy

1237 ? InitialOccupancy + 1

1238 : InitialOccupancy;

1239 IsAnyRegionScheduled = false;

1240 S.SGPRLimitBias = S.HighRPSGPRBias;

1241 S.VGPRLimitBias = S.HighRPVGPRBias;

1242

1245 << "Retrying function scheduling without clustering. "

1246 "Aggressively try to reduce register pressure to achieve occupancy "

1247 << TempTargetOccupancy << ".\n");

1248

1249 return true;

1250}

1251

1254 return false;

1255

1257 return false;

1258

1259

1260

1261

1262 if (DAG.StartingOccupancy <= DAG.MinOccupancy)

1263 return false;

1264

1266 dbgs() << "Retrying function scheduling with lowest recorded occupancy "

1267 << DAG.MinOccupancy << ".\n");

1268 return true;

1269}

1270

1271

1272#define REMAT_PREFIX "[PreRARemat] "

1273#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)

1274

1276

1277

1278

1279 assert(S.hasNextStage());

1280

1282 return false;

1283

1284

1285

1286 const unsigned NumRegions = DAG.Regions.size();

1287 RegionBB.reserve(NumRegions);

1288 for (unsigned I = 0; I < NumRegions; ++I) {

1291 MIRegion.insert({&*MI, I});

1293 }

1294

1295 if (!canIncreaseOccupancyOrReduceSpill())

1296 return false;

1297

1298

1299 rematerialize();

1301 DAG.RegionLiveOuts.buildLiveRegMap();

1303 dbgs() << "Retrying function scheduling with new min. occupancy of "

1304 << AchievedOcc << " from rematerializing (original was "

1305 << DAG.MinOccupancy;

1306 if (TargetOcc)

1307 dbgs() << ", target was " << *TargetOcc;

1308 dbgs() << ")\n";

1309 });

1310

1311 if (AchievedOcc > DAG.MinOccupancy) {

1312 DAG.MinOccupancy = AchievedOcc;

1314 MFI.increaseOccupancy(MF, DAG.MinOccupancy);

1315 }

1316 return true;

1317}

1318

1320 DAG.finishBlock();

1322}

1323

1326 S.SGPRLimitBias = S.VGPRLimitBias = 0;

1327 if (DAG.MinOccupancy > InitialOccupancy) {

1328 assert(IsAnyRegionScheduled);

1330 << " stage successfully increased occupancy to "

1331 << DAG.MinOccupancy << '\n');

1332 } else if (!IsAnyRegionScheduled) {

1333 assert(DAG.MinOccupancy == InitialOccupancy);

1335 << ": No regions scheduled, min occupancy stays at "

1336 << DAG.MinOccupancy << ", MFI occupancy stays at "

1337 << MFI.getOccupancy() << ".\n");

1338 }

1339

1341}

1342

1344

1347

1348 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());

1350

1351

1352 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))

1353 return false;

1354

1355 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

1358 << "\n From: " << *DAG.begin() << " To: ";

1360 else dbgs() << "End";

1361 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');

1362

1363

1365 Unsched.reserve(DAG.NumRegionInstrs);

1369 for (auto &I : DAG) {

1372 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;

1373 }

1374 } else {

1375 for (auto &I : DAG)

1377 }

1378

1380

1382 dbgs() << "Pressure before scheduling:\nRegion live-ins:"

1384 << "Region live-in pressure: "

1387

1388 S.HasHighPressure = false;

1390

1400 }

1401

1402 return true;

1403}

1404

1406

1407

1408

1409

1410 unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();

1411

1412

1413 unsigned CurrentTargetOccupancy =

1414 IsAnyRegionScheduled ? DAG.MinOccupancy : TempTargetOccupancy;

1416 (CurrentTargetOccupancy <= InitialOccupancy ||

1417 DAG.Pressure[RegionIdx].getOccupancy(ST, DynamicVGPRBlockSize) !=

1418 InitialOccupancy))

1419 return false;

1420

1422

1423

1424 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {

1425 IsAnyRegionScheduled = true;

1426 if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy) {

1427 DAG.MinOccupancy = TempTargetOccupancy;

1428 MFI.increaseOccupancy(MF, TempTargetOccupancy);

1429 }

1430 }

1431 return IsSchedulingThisRegion;

1432}

1433

1435

1436

1437

1438

1439

1441 return false;

1442

1444}

1445

1449

1452 DAG.finishBlock();

1453

1456

1457

1462}

1463

1466 if (S.HasHighPressure)

1468

1469

1470

1472

1476

1477 DAG.exitRegion();

1479}

1480

1482

1484

1487

1488 unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();

1489

1490 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&

1491 PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {

1493

1494

1495 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");

1496 return;

1497 }

1498

1499 unsigned TargetOccupancy = std::min(

1500 S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);

1501 unsigned WavesAfter = std::min(

1502 TargetOccupancy, PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize));

1503 unsigned WavesBefore = std::min(

1504 TargetOccupancy, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));

1505 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore

1506 << ", after " << WavesAfter << ".\n");

1507

1508

1509

1510

1511

1512 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);

1513

1514

1515

1516 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&

1517 WavesAfter >= MFI.getMinAllowedOccupancy()) {

1518 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "

1519 << MFI.getMinAllowedOccupancy() << " waves\n");

1520 NewOccupancy = WavesAfter;

1521 }

1522

1523 if (NewOccupancy < DAG.MinOccupancy) {

1524 DAG.MinOccupancy = NewOccupancy;

1525 MFI.limitOccupancy(DAG.MinOccupancy);

1526 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "

1527 << DAG.MinOccupancy << ".\n");

1528 }

1529

1530

1531 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);

1532

1533

1534 unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());

1535 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);

1536

1537 if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||

1538 PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||

1543 }

1544

1545

1546

1549 else

1551}

1552

1553unsigned

1557 unsigned ReadyCycle = CurrCycle;

1558 for (auto &D : SU.Preds) {

1559 if (D.isAssignedRegDep()) {

1561 unsigned Latency = SM.computeInstrLatency(DefMI);

1562 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];

1563 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);

1564 }

1565 }

1566 ReadyCycles[SU.NodeNum] = ReadyCycle;

1567 return ReadyCycle;

1568}

1569

1570#ifndef NDEBUG

1573 std::pair<MachineInstr *, unsigned> B) const {

1574 return A.second < B.second;

1575 }

1576};

1577

1580 if (ReadyCycles.empty())

1581 return;

1582 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();

1583 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum

1584 << " ##################\n# Cycle #\t\t\tInstruction "

1585 " "

1586 " \n";

1587 unsigned IPrev = 1;

1588 for (auto &I : ReadyCycles) {

1589 if (I.second > IPrev + 1)

1590 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev

1591 << " CYCLES DETECTED ******************************\n\n";

1592 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";

1593 IPrev = I.second;

1594 }

1595}

1596#endif

1597

1600#ifndef NDEBUG

1602 ReadyCyclesSorted;

1603#endif

1605 unsigned SumBubbles = 0;

1607 unsigned CurrCycle = 0;

1608 for (auto &SU : InputSchedule) {

1609 unsigned ReadyCycle =

1611 SumBubbles += ReadyCycle - CurrCycle;

1612#ifndef NDEBUG

1613 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));

1614#endif

1615 CurrCycle = ++ReadyCycle;

1616 }

1617#ifndef NDEBUG

1620 dbgs() << "\n\t"

1621 << "Metric: "

1622 << (SumBubbles

1624 : 1)

1625 << "\n\n");

1626#endif

1627

1629}

1630

1633#ifndef NDEBUG

1635 ReadyCyclesSorted;

1636#endif

1638 unsigned SumBubbles = 0;

1640 unsigned CurrCycle = 0;

1641 for (auto &MI : DAG) {

1643 if (!SU)

1644 continue;

1645 unsigned ReadyCycle =

1647 SumBubbles += ReadyCycle - CurrCycle;

1648#ifndef NDEBUG

1649 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));

1650#endif

1651 CurrCycle = ++ReadyCycle;

1652 }

1653#ifndef NDEBUG

1656 dbgs() << "\n\t"

1657 << "Metric: "

1658 << (SumBubbles

1660 : 1)

1661 << "\n\n");

1662#endif

1663

1665}

1666

1668 if (WavesAfter < DAG.MinOccupancy)

1669 return true;

1670

1671

1672 if (DAG.MFI.isDynamicVGPREnabled()) {

1674 &ST, DAG.MFI.getDynamicVGPRBlockSize(),

1677 &ST, DAG.MFI.getDynamicVGPRBlockSize(),

1679 if (BlocksAfter > BlocksBefore)

1680 return true;

1681 }

1682

1683 return false;

1684}

1685

1688 return false;

1689

1691 return true;

1692

1694 return true;

1695

1696 return false;

1697}

1698

1700

1701

1702 if ((WavesAfter <=

1706 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");

1707 return true;

1708 }

1709

1710

1712 return false;

1713

1716 << "\n\t *** In shouldRevertScheduling ***\n"

1717 << " *********** BEFORE UnclusteredHighRPStage ***********\n");

1721 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");

1723 unsigned OldMetric = MBefore.getMetric();

1724 unsigned NewMetric = MAfter.getMetric();

1725 unsigned WavesBefore = std::min(

1726 S.getTargetOccupancy(),

1728 unsigned Profit =

1731 NewMetric) /

1733 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "

1734 << MAfter << "Profit: " << Profit << "\n");

1736}

1737

1740 return false;

1741

1743 return true;

1744

1746 return true;

1747

1748 return false;

1749}

1750

1753 mayCauseSpilling(WavesAfter) || (TargetOcc && WavesAfter < TargetOcc);

1754}

1755

1758 return true;

1759

1760 return false;

1761}

1762

1764 unsigned WavesAfter) {

1766}

1767

1771 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");

1772 return true;

1773 }

1774

1775 return false;

1776}

1777

1779 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");

1780 DAG.RegionEnd = DAG.RegionBegin;

1781 int SkippedDebugInstr = 0;

1783 if (MI->isDebugInstr()) {

1784 ++SkippedDebugInstr;

1785 continue;

1786 }

1787

1788 if (MI->getIterator() != DAG.RegionEnd) {

1789 DAG.BB->splice(DAG.RegionEnd, DAG.BB, MI);

1790 if (MI->isDebugInstr())

1791 DAG.LIS->handleMove(*MI, true);

1792 }

1793

1794

1795 for (auto &Op : MI->all_defs())

1796 Op.setIsUndef(false);

1798 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);

1799 if (MI->isDebugInstr()) {

1800 if (DAG.ShouldTrackLaneMasks) {

1801

1802 SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();

1804 } else {

1805

1807 }

1808 }

1809 DAG.RegionEnd = MI->getIterator();

1810 ++DAG.RegionEnd;

1812 }

1813

1814

1815

1816

1817 while (SkippedDebugInstr-- > 0)

1818 ++DAG.RegionEnd;

1819

1820

1821

1822

1823 DAG.RegionBegin = Unsched.front()->getIterator();

1824 if (DAG.RegionBegin->isDebugInstr()) {

1826 if (MI->isDebugInstr())

1827 continue;

1828 DAG.RegionBegin = MI->getIterator();

1829 break;

1830 }

1831 }

1832

1833

1834

1835 DAG.placeDebugValues();

1836

1838}

1839

1840bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {

1842

1843

1844

1845

1849 auto ResetTargetRegions = [&]() {

1850 OptRegions.clear();

1851 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

1854 if (Target.satisfied())

1856 }

1857 };

1858

1859 ResetTargetRegions();

1860 if (!OptRegions.empty() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {

1861

1862

1863 TargetOcc = std::nullopt;

1864 } else {

1865

1866

1867 TargetOcc = DAG.MinOccupancy + 1;

1868 unsigned VGPRBlockSize =

1870 MaxSGPRs = ST.getMaxNumSGPRs(*TargetOcc, false);

1871 MaxVGPRs = ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);

1872 ResetTargetRegions();

1873 }

1875 dbgs() << "Analyzing ";

1876 MF.getFunction().printAsOperand(dbgs(), false);

1877 dbgs() << ": ";

1878 if (OptRegions.empty()) {

1879 dbgs() << "no objective to achieve, occupancy is maximal at "

1880 << MFI.getMaxWavesPerEU();

1881 } else if (!TargetOcc) {

1882 dbgs() << "reduce spilling (minimum target occupancy is "

1883 << MFI.getMinWavesPerEU() << ')';

1884 } else {

1885 dbgs() << "increase occupancy from " << DAG.MinOccupancy << " to "

1886 << TargetOcc;

1887 }

1888 dbgs() << '\n';

1889 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

1890 if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end()) {

1891 dbgs() << REMAT_PREFIX << " [" << I << "] " << OptIt->getSecond()

1892 << '\n';

1893 }

1894 }

1895 });

1896 if (OptRegions.empty())

1897 return false;

1898

1899

1900

1901

1902

1903 auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,

1904 bool &Progress) -> bool {

1905 GCNRPTarget &Target = OptIt->getSecond();

1906 if (Target.isSaveBeneficial(Reg))

1907 return false;

1908 Progress = true;

1910 if (Target.satisfied())

1911 OptRegions.erase(OptIt->getFirst());

1912 return OptRegions.empty();

1913 };

1914

1915

1916

1917 DAG.RegionLiveOuts.buildLiveRegMap();

1918

1919

1920 DenseSet RematRegs;

1921

1922

1923 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

1926

1927 MachineInstr &DefMI = *MI;

1928 if (!isReMaterializable(DefMI))

1929 continue;

1930

1931

1934 continue;

1935

1936

1937

1938

1939 MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);

1941 continue;

1942 auto UseRegion = MIRegion.find(UseMI);

1943 if (UseRegion != MIRegion.end() && UseRegion->second == I)

1944 continue;

1945

1946

1947

1948

1949

1950

1951

1952 if (Rematerializations.contains(UseMI) ||

1953 llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {

1954 return MO.isReg() && RematRegs.contains(MO.getReg());

1955 }))

1956 continue;

1957

1958

1959

1960

1961 SlotIndex UseIdx = DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(true);

1963 *DAG.TII))

1964 continue;

1965

1967 RematInstruction &Remat =

1968 Rematerializations.try_emplace(&DefMI, UseMI).first->second;

1969

1970 bool RematUseful = false;

1971 if (auto It = OptRegions.find(I); It != OptRegions.end()) {

1972

1973

1974

1975

1976 REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");

1977 LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];

1978 if (ReduceRPInRegion(It, Reg, Mask, RematUseful))

1979 return true;

1980 }

1981

1982 for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) {

1983

1984

1985 auto It = DAG.LiveIns[LIRegion].find(Reg);

1986 if (It == DAG.LiveIns[LIRegion].end() || It->second.none())

1987 continue;

1988 Remat.LiveInRegions.insert(LIRegion);

1989

1990

1991

1992

1993

1994

1995

1996 if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {

1997 REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');

1998 if (ReduceRPInRegion(It, Reg, DAG.LiveIns[LIRegion][Reg],

1999 RematUseful))

2000 return true;

2001 }

2002 }

2003

2004

2005

2006 if (!RematUseful) {

2007 Rematerializations.pop_back();

2008 REMAT_DEBUG(dbgs() << " No impact, not rematerializing instruction\n");

2009 } else {

2011 }

2012 }

2013 }

2014

2015 if (TargetOcc) {

2016

2018 Rematerializations.clear();

2019 return false;

2020 }

2021 REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n");

2022 return !Rematerializations.empty();

2023}

2024

2025void PreRARematStage::rematerialize() {

2026 const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo();

2027

2028

2029

2030 DenseSet RecomputeRP;

2031

2032

2033 for (auto &[DefMI, Remat] : Rematerializations) {

2036 unsigned DefRegion = MIRegion.at(DefMI);

2037

2038

2040 AMDGPU::NoSubRegister, *DefMI);

2041 Remat.RematMI = &*std::prev(InsertPos);

2042 DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);

2043

2044

2045

2046

2047 DAG.updateRegionBoundaries(DAG.Regions[DefRegion], DefMI, nullptr);

2048 auto UseRegion = MIRegion.find(Remat.UseMI);

2049 if (UseRegion != MIRegion.end()) {

2050 DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], InsertPos,

2051 Remat.RematMI);

2052 }

2053 DAG.LIS->RemoveMachineInstrFromMaps(*DefMI);

2055

2056

2057

2058 for (unsigned I : Remat.LiveInRegions) {

2059 ImpactedRegions.insert({I, DAG.Pressure[I]});

2061

2062#ifdef EXPENSIVE_CHECKS

2063

2064

2067 continue;

2068

2071 continue;

2072

2073 LiveInterval &LI = DAG.LIS->getInterval(UseReg);

2074 LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());

2076 LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());

2077

2078 LaneBitmask LiveInMask = RegionLiveIns.at(UseReg);

2079 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);

2080

2081

2082

2083 if (UncoveredLanes.any()) {

2085 for (LiveInterval::SubRange &SR : LI.subranges())

2086 assert((SR.LaneMask & UncoveredLanes).none());

2087 }

2088 }

2089#endif

2090

2091

2092

2093

2094

2095

2096 LaneBitmask PrevMask = RegionLiveIns[Reg];

2098 RegMasks.insert({{I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});

2099 if (Remat.UseMI->getParent() != DAG.Regions[I].first->getParent())

2101 else

2103 }

2104

2105

2106 ImpactedRegions.insert({DefRegion, DAG.Pressure[DefRegion]});

2107 RecomputeRP.insert(DefRegion);

2108

2109

2110 Register RematReg = Remat.RematMI->getOperand(0).getReg();

2111 DAG.LIS->removeInterval(RematReg);

2112 DAG.LIS->createAndComputeVirtRegInterval(RematReg);

2113 }

2114

2115

2116

2117

2118 unsigned DynamicVGPRBlockSize =

2120 AchievedOcc = MFI.getMaxWavesPerEU();

2121 for (auto &[I, OriginalRP] : ImpactedRegions) {

2122 bool IsEmptyRegion = DAG.Regions[I].first == DAG.Regions[I].second;

2123 RescheduleRegions[I] = !IsEmptyRegion;

2125 continue;

2126

2127 GCNRegPressure RP;

2128 if (IsEmptyRegion) {

2130 } else {

2131 GCNDownwardRPTracker RPT(*DAG.LIS);

2133 DAG.Regions[I].second);

2134 if (NonDbgMI == DAG.Regions[I].second) {

2135

2137 } else {

2138 RPT.reset(*NonDbgMI, &DAG.LiveIns[I]);

2139 RPT.advance(DAG.Regions[I].second);

2140 RP = RPT.moveMaxPressure();

2141 }

2142 }

2143 DAG.Pressure[I] = RP;

2144 AchievedOcc =

2145 std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize));

2146 }

2147 REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n");

2148}

2149

2150

2151bool PreRARematStage::isReMaterializable(const MachineInstr &MI) {

2152 if (DAG.TII->isReMaterializable(MI))

2153 return false;

2154

2155 for (const MachineOperand &MO : MI.all_uses()) {

2156

2157

2159 if (DAG.MRI.isConstantPhysReg(MO.getReg()) || DAG.TII->isIgnorableUse(MO))

2160 continue;

2161 return false;

2162 }

2163 }

2164

2165 return true;

2166}

2167

2169

2170

2171

2172

2173

2174 unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy);

2175 if (!TargetOcc || MaxOcc >= *TargetOcc)

2176 return;

2177

2178 REMAT_DEBUG(dbgs() << "Rolling back all rematerializations\n");

2179 const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo();

2180

2181

2182 for (const auto &[DefMI, Remat] : Rematerializations) {

2183 MachineInstr &RematMI = *Remat.RematMI;

2184 unsigned DefRegion = MIRegion.at(DefMI);

2186 MachineBasicBlock *MBB = RegionBB[DefRegion];

2188

2189

2190

2191

2193 MachineInstr *NewMI = &*std::prev(InsertPos);

2194 DAG.LIS->InsertMachineInstrInMaps(*NewMI);

2195

2196 auto UseRegion = MIRegion.find(Remat.UseMI);

2197 if (UseRegion != MIRegion.end()) {

2198 DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], RematMI,

2199 nullptr);

2200 }

2201 DAG.updateRegionBoundaries(DAG.Regions[DefRegion], InsertPos, NewMI);

2202

2203

2204 DAG.LIS->RemoveMachineInstrFromMaps(RematMI);

2206

2207

2208 DAG.LIS->removeInterval(Reg);

2209 DAG.LIS->createAndComputeVirtRegInterval(Reg);

2210

2211

2212 for (unsigned LIRegion : Remat.LiveInRegions)

2213 DAG.LiveIns[LIRegion].insert({Reg, RegMasks.at({LIRegion, Reg})});

2214 }

2215

2216

2217 for (auto &[I, OriginalRP] : ImpactedRegions)

2218 DAG.Pressure[I] = OriginalRP;

2219

2221}

2222

2223void GCNScheduleDAGMILive::updateRegionBoundaries(

2226 assert((!NewMI || NewMI != RegionBounds.second) &&

2227 "cannot remove at region end");

2228

2229 if (RegionBounds.first == RegionBounds.second) {

2230 assert(NewMI && "cannot remove from an empty region");

2231 RegionBounds.first = NewMI;

2232 return;

2233 }

2234

2235

2236

2237 if (MI != RegionBounds.first)

2238 return;

2239 if (!NewMI)

2240 RegionBounds.first = std::next(MI);

2241 else

2242 RegionBounds.first = NewMI;

2243}

2244

2251

2256

2259 if (HasIGLPInstrs) {

2260 SavedMutations.clear();

2263 }

2264

2266}

2267

2269 if (HasIGLPInstrs)

2271

2273}

MachineInstrBuilder & UseMI

MachineInstrBuilder MachineInstrBuilder & DefMI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...

static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))

static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))

#define REMAT_PREFIX

Allows to easily filter for this stage's debug output.

Definition GCNSchedStrategy.cpp:1272

static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)

Definition GCNSchedStrategy.cpp:979

static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)

Definition GCNSchedStrategy.cpp:344

static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))

#define REMAT_DEBUG(X)

Definition GCNSchedStrategy.cpp:1273

static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))

static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)

Definition GCNSchedStrategy.cpp:1578

static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))

static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)

Definition GCNSchedStrategy.cpp:2245

static bool canUsePressureDiffs(const SUnit &SU)

Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.

Definition GCNSchedStrategy.cpp:171

static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))

static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)

Definition GCNSchedStrategy.cpp:187

static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))

static SUnit * pickOnlyChoice(SchedBoundary &Zone, const TargetSchedModel *SchedModel)

Definition GCNSchedStrategy.cpp:352

static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))

static Register UseReg(const MachineOperand &MO)

A common definition of LaneBitmask for use in TableGen and CodeGen.

Promote Memory to Register

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

bool initGCNSchedStage() override

Definition GCNSchedStrategy.cpp:1252

bool shouldRevertScheduling(unsigned WavesAfter) override

Definition GCNSchedStrategy.cpp:1738

bool initGCNRegion() override

Definition GCNSchedStrategy.cpp:1434

ValueT & at(const_arg_type_t< KeyT > Val)

at - Return the entry for the specified key, or abort if no such entry exists.

iterator find(const_arg_type_t< KeyT > Val)

bool erase(const KeyT &Val)

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const

Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...

GCNMaxILPSchedStrategy(const MachineSchedContext *C)

Definition GCNSchedStrategy.cpp:699

bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override

Apply a set of heuristics to a new candidate.

Definition GCNSchedStrategy.cpp:704

bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override

GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.

Definition GCNSchedStrategy.cpp:806

GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)

Definition GCNSchedStrategy.cpp:790

GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)

Definition GCNSchedStrategy.cpp:689

void finalizeSchedule() override

Allow targets to perform final scheduling actions at the level of the whole MachineFunction.

Definition GCNSchedStrategy.cpp:2268

void schedule() override

Orders nodes according to selected style.

Definition GCNSchedStrategy.cpp:2257

GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)

Definition GCNSchedStrategy.cpp:2252

Models a register pressure target, allowing to evaluate and track register savings against that targe...

GCNRegPressure getPressure() const

DenseMap< unsigned, LaneBitmask > LiveRegSet

virtual bool initGCNRegion()

Definition GCNSchedStrategy.cpp:1343

GCNRegPressure PressureBefore

void revertScheduling()

Definition GCNSchedStrategy.cpp:1778

bool isRegionWithExcessRP() const

bool mayCauseSpilling(unsigned WavesAfter)

Definition GCNSchedStrategy.cpp:1768

ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)

Definition GCNSchedStrategy.cpp:1599

GCNScheduleDAGMILive & DAG

const GCNSchedStageID StageID

std::vector< MachineInstr * > Unsched

GCNRegPressure PressureAfter

void finalizeGCNRegion()

Definition GCNSchedStrategy.cpp:1464

SIMachineFunctionInfo & MFI

void checkScheduling()

Definition GCNSchedStrategy.cpp:1481

unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)

Definition GCNSchedStrategy.cpp:1554

virtual void finalizeGCNSchedStage()

Definition GCNSchedStrategy.cpp:1319

virtual bool initGCNSchedStage()

Definition GCNSchedStrategy.cpp:1211

virtual bool shouldRevertScheduling(unsigned WavesAfter)

Definition GCNSchedStrategy.cpp:1667

std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations

GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)

Definition GCNSchedStrategy.cpp:1207

void setupNewBlock()

Definition GCNSchedStrategy.cpp:1450

MachineBasicBlock * CurrentMBB

This is a minimal scheduler strategy.

GCNDownwardRPTracker DownwardTracker

GCNSchedStrategy(const MachineSchedContext *C)

Definition GCNSchedStrategy.cpp:95

SmallVector< GCNSchedStageID, 4 > SchedStages

unsigned SGPRCriticalLimit

std::vector< unsigned > MaxPressure

bool hasNextStage() const

Definition GCNSchedStrategy.cpp:638

SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)

Definition GCNSchedStrategy.cpp:451

GCNSchedStageID getCurrentStage()

Definition GCNSchedStrategy.cpp:623

bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const

Evaluates instructions in the pending queue using a subset of scheduling heuristics.

Definition GCNSchedStrategy.cpp:648

bool advanceStage()

Definition GCNSchedStrategy.cpp:628

SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage

unsigned VGPRCriticalLimit

void schedNode(SUnit *SU, bool IsTopNode) override

Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...

Definition GCNSchedStrategy.cpp:613

GCNDownwardRPTracker * getDownwardTracker()

std::vector< unsigned > Pressure

void initialize(ScheduleDAGMI *DAG) override

Initialize the strategy after building the DAG for a new region.

Definition GCNSchedStrategy.cpp:100

GCNUpwardRPTracker UpwardTracker

void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)

Definition GCNSchedStrategy.cpp:362

void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)

Definition GCNSchedStrategy.cpp:380

void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)

Definition GCNSchedStrategy.cpp:221

SUnit * pickNode(bool &IsTopNode) override

Pick the next node to schedule, or return NULL.

Definition GCNSchedStrategy.cpp:548

GCNUpwardRPTracker * getUpwardTracker()

GCNSchedStageID getNextStage() const

Definition GCNSchedStrategy.cpp:643

void finalizeSchedule() override

Allow targets to perform final scheduling actions at the level of the whole MachineFunction.

Definition GCNSchedStrategy.cpp:1102

void schedule() override

Orders nodes according to selected style.

Definition GCNSchedStrategy.cpp:965

GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)

Definition GCNSchedStrategy.cpp:924

unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const

unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const

void recede(const MachineInstr &MI)

Move to the state of RP just before the MI .

void traceCandidate(const SchedCandidate &Cand)

LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)

Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...

MachineSchedPolicy RegionPolicy

const TargetSchedModel * SchedModel

const MachineSchedContext * Context

const TargetRegisterInfo * TRI

SchedCandidate BotCand

Candidate last picked from Bot boundary.

SchedCandidate TopCand

Candidate last picked from Top boundary.

virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const

Apply a set of heuristics to a new candidate.

void initialize(ScheduleDAGMI *dag) override

Initialize the strategy after building the DAG for a new region.

void schedNode(SUnit *SU, bool IsTopNode) override

Update the scheduler's state after scheduling a node.

GenericScheduler(const MachineSchedContext *C)

bool shouldRevertScheduling(unsigned WavesAfter) override

Definition GCNSchedStrategy.cpp:1756

bool hasSubRanges() const

Returns true if subregister liveness information is available.

iterator_range< subrange_iterator > subranges()

LLVM_ABI void dump() const

succ_iterator succ_begin()

unsigned succ_size() const

MachineInstrBundleIterator< MachineInstr > iterator

Function & getFunction()

Return the LLVM function that this machine code represents.

Register getReg(unsigned Idx) const

Get the register for the operand index.

Representation of each machine instruction.

bool mayLoad(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly read memory.

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

const MachineOperand & getOperand(unsigned i) const

unsigned getSubReg() const

bool readsReg() const

readsReg - Returns true if this operand reads the previous value of its register.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

Register getReg() const

getReg - Returns the register number.

bool shouldRevertScheduling(unsigned WavesAfter) override

Definition GCNSchedStrategy.cpp:1763

bool shouldRevertScheduling(unsigned WavesAfter) override

Definition GCNSchedStrategy.cpp:1686

bool shouldRevertScheduling(unsigned WavesAfter) override

Definition GCNSchedStrategy.cpp:1751

bool initGCNRegion() override

Definition GCNSchedStrategy.cpp:1446

bool initGCNSchedStage() override

Definition GCNSchedStrategy.cpp:1275

Capture a change in pressure for a single pressure set.

Helpers for implementing custom MachineSchedStrategy classes.

Track the current register pressure at some position in the instruction stream, and remember the high...

LLVM_ABI void advance()

Advance across the current instruction.

LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)

Get the pressure of each PSet after traversing this instruction top-down.

const std::vector< unsigned > & getRegSetPressureAtPos() const

Get the register set pressure at the current position, which may be less than the pressure across the...

LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)

Get the pressure of each PSet after traversing this instruction bottom-up.

RegionT * getParent() const

Get the parent of the Region.

void buildLiveRegMap()

Definition GCNSchedStrategy.cpp:1086

List of registers defined and used by a machine instruction.

LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)

Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...

LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)

Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...

LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)

Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

bool isIGLPMutationOnly(unsigned Opcode) const

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

unsigned getOccupancy() const

unsigned getDynamicVGPRBlockSize() const

unsigned getMinAllowedOccupancy() const

Scheduling unit. This is a node in the scheduling DAG.

bool isInstr() const

Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.

unsigned TopReadyCycle

Cycle relative to start when node is ready.

unsigned NodeNum

Entry # of node in the node vector.

unsigned short Latency

Node latency.

bool isScheduled

True once scheduled.

unsigned ParentClusterIdx

The parent cluster id.

unsigned BotReadyCycle

Cycle relative to end when node is ready.

bool isBottomReady() const

SmallVector< SDep, 4 > Preds

All sunit predecessors.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

Each Scheduling boundary is associated with ready queues.

LLVM_ABI void releasePending()

Release pending ready nodes in to the available queue.

LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)

Get the difference between the given SUnit's ready time and the current cycle.

LLVM_ABI SUnit * pickOnlyChoice()

Call this before applying any other heuristics to the Available queue.

LLVM_ABI void bumpCycle(unsigned NextCycle)

Move the boundary of scheduled code by one cycle.

unsigned getCurrMOps() const

Micro-ops issued in the current cycle.

unsigned getCurrCycle() const

Number of cycles to issue the instructions scheduled in this zone.

LLVM_ABI bool checkHazard(SUnit *SU)

Does this SU have a hazard within the current instruction group.

A ScheduleDAG for scheduling lists of MachineInstr.

bool ScheduleSingleMIRegions

True if regions with a single MI should be scheduled.

MachineBasicBlock::iterator RegionEnd

The end of the range to be scheduled.

virtual void finalizeSchedule()

Allow targets to perform final scheduling actions at the level of the whole MachineFunction.

virtual void exitRegion()

Called when the scheduler has finished scheduling the current region.

const MachineLoopInfo * MLI

bool RemoveKillFlags

True if the DAG builder should remove kill flags (in preparation for rescheduling).

MachineBasicBlock::iterator RegionBegin

The beginning of the range to be scheduled.

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)

RegPressureTracker RPTracker

ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...

void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)

Add a postprocessing step to the DAG builder.

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)

std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations

Ordered list of DAG postprocessing steps.

MachineRegisterInfo & MRI

Virtual/real register map.

const TargetInstrInfo * TII

Target instruction information.

MachineFunction & MF

Machine function.

static const unsigned ScaleFactor

unsigned getMetric() const

SlotIndex - An opaque wrapper around machine indexes.

SlotIndex getMBBStartIdx(unsigned Num) const

Returns the first index in the given basic block number.

virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const

Re-issue the specified 'original' instruction at the specific location targeting a new destination re...

Provide an instruction scheduling machine model to CodeGen passes.

LLVM_ABI bool hasInstrSchedModel() const

Return true if this machine model includes an instruction-level scheduling model.

unsigned getMicroOpBufferSize() const

Number of micro-ops that may be buffered for OOO execution.

Target - Wrapper for Target specific information.

bool initGCNSchedStage() override

Definition GCNSchedStrategy.cpp:1219

bool initGCNRegion() override

Definition GCNSchedStrategy.cpp:1405

void finalizeGCNSchedStage() override

Definition GCNSchedStrategy.cpp:1324

bool shouldRevertScheduling(unsigned WavesAfter) override

Definition GCNSchedStrategy.cpp:1699

static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)

std::pair< iterator, bool > insert(const ValueT &V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)

unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)

unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)

unsigned getDynamicVGPRBlockSize(const Function &F)

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)

LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)

GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)

std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)

Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries

A region's boundaries i.e.

IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)

Increment It until it points to a non-debug instruction or to End and return the resulting iterator.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)

@ UnclusteredHighRPReschedule

@ MemoryClauseInitialSchedule

@ ClusteredLowOccupancyReschedule

auto reverse(ContainerTy &&C)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

LLVM_ABI cl::opt< bool > VerifyScheduling

LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)

IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)

Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

bool isTheSameCluster(unsigned A, unsigned B)

Return whether the input cluster ID's are the same and valid.

DWARFExpression::Operation Op

LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)

creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...

GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)

LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)

Return true if this heuristic determines order.

LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)

LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)

Minimize physical register live ranges.

Implement std::hash so that hash_code can be used in STL containers.

bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const

Definition GCNSchedStrategy.cpp:1572

unsigned getArchVGPRNum() const

unsigned getAGPRNum() const

unsigned getSGPRNum() const

Policy for scheduling the next instruction in the candidate's zone.

Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...

void setBest(SchedCandidate &Best)

void reset(const CandPolicy &NewPolicy)

LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)

SchedResourceDelta ResDelta

Status of an instruction's critical resource consumption.

unsigned DemandedResources

constexpr bool any() const

static constexpr LaneBitmask getNone()

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

PressureChange CriticalMax

PressureChange CurrentMax