LLVM: lib/Target/AMDGPU/SIMachineScheduler.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

19

20using namespace llvm;

21

22#define DEBUG_TYPE "machine-scheduler"

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122#ifndef NDEBUG

123

125 switch (Reason) {

126 case NoCand: return "NOCAND";

127 case RegUsage: return "REGUSAGE";

128 case Latency: return "LATENCY";

129 case Successor: return "SUCCESSOR";

130 case Depth: return "DEPTH";

132 }

134}

135

136#endif

137

139static bool tryLess(int TryVal, int CandVal,

143 if (TryVal < CandVal) {

144 TryCand.Reason = Reason;

145 return true;

146 }

147 if (TryVal > CandVal) {

148 if (Cand.Reason > Reason)

149 Cand.Reason = Reason;

150 return true;

151 }

153 return false;

154}

155

160 if (TryVal > CandVal) {

161 TryCand.Reason = Reason;

162 return true;

163 }

164 if (TryVal < CandVal) {

165 if (Cand.Reason > Reason)

166 Cand.Reason = Reason;

167 return true;

168 }

170 return false;

171}

172}

173

174

175

177 NodeNum2Index[SU->NodeNum] = SUnits.size();

178 SUnits.push_back(SU);

179}

180

181#ifndef NDEBUG

182void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {

183

184 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);

185 dbgs() << '\n';

186}

187#endif

188

189void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,

190 SISchedCandidate &TryCand) {

191

192 if (!Cand.isValid()) {

194 return;

195 }

196

197 if (Cand.SGPRUsage > 60 &&

200 return;

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

218 Cand.HasLowLatencyNonWaitedParent,

220 return;

221

224 return;

225

226 if (TryCand.IsLowLatency &&

227 SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,

229 return;

230

233 return;

234

235

236 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {

238 }

239}

240

241SUnit* SIScheduleBlock::pickNode() {

242 SISchedCandidate TopCand;

243

244 for (SUnit* SU : TopReadySUs) {

245 SISchedCandidate TryCand;

246 std::vector pressure;

247 std::vector MaxPressure;

248

249 TryCand.SU = SU;

251 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];

252 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];

253 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];

254 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];

255 TryCand.HasLowLatencyNonWaitedParent =

256 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];

257 tryCandidateTopDown(TopCand, TryCand);

258 if (TryCand.Reason != NoCand)

259 TopCand.setBest(TryCand);

260 }

261

262 return TopCand.SU;

263}

264

265

266

268 TopReadySUs.clear();

269 if (Scheduled)

270 undoSchedule();

271

272 for (SUnit* SU : SUnits) {

273 if (!SU->NumPredsLeft)

274 TopReadySUs.push_back(SU);

275 }

276

277 while (!TopReadySUs.empty()) {

278 SUnit *SU = TopReadySUs[0];

279 ScheduledSUnits.push_back(SU);

280 nodeScheduled(SU);

281 }

282

283 Scheduled = true;

284}

285

286

292 UI = MRI->def_instr_begin(Reg),

293 UE = MRI->def_instr_end(); UI != UE; ++UI) {

295 if (MI->isDebugValue())

296 continue;

298 if (InstSlot >= First && InstSlot <= Last)

299 return true;

300 }

301 return false;

302}

303

313

314

315

316 for (SUnit* SU : ScheduledSUnits) {

317 RPTracker.setPos(SU->getInstr());

318 RPTracker.advance();

319 }

320

321

322 RPTracker.closeRegion();

323

324

325 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);

326 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);

327

328

329 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {

330 if (RegMaskPair.RegUnit.isVirtual())

331 LiveInRegs.insert(RegMaskPair.RegUnit);

332 }

333 LiveOutRegs.clear();

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {

358 if (Reg.isVirtual() &&

361 LIS)) {

362 LiveOutRegs.insert(Reg);

363 }

364 }

365

366

367

368

369

372

373

375}

376

379 if (!Scheduled)

381

382

383 initRegPressure(BeginBlock, EndBlock);

384 undoSchedule();

385

386

387

388 TopReadySUs.clear();

389

390 for (SUnit* SU : SUnits) {

391 if (!SU->NumPredsLeft)

392 TopReadySUs.push_back(SU);

393 }

394

395 while (!TopReadySUs.empty()) {

396 SUnit *SU = pickNode();

397 ScheduledSUnits.push_back(SU);

400 nodeScheduled(SU);

401 }

402

403

404 InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());

405

406

407#ifndef NDEBUG

408 assert(SUnits.size() == ScheduledSUnits.size() &&

409 TopReadySUs.empty());

410 for (SUnit* SU : SUnits) {

411 assert(SU->isScheduled &&

412 SU->NumPredsLeft == 0);

413 }

414#endif

415

416 Scheduled = true;

417}

418

419void SIScheduleBlock::undoSchedule() {

420 for (SUnit* SU : SUnits) {

421 SU->isScheduled = false;

422 for (SDep& Succ : SU->Succs) {

424 undoReleaseSucc(SU, &Succ);

425 }

426 }

427 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);

428 ScheduledSUnits.clear();

429 Scheduled = false;

430}

431

432void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {

434

435 if (SuccEdge->isWeak()) {

437 return;

438 }

440}

441

442void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {

444

445 if (SuccEdge->isWeak()) {

447 return;

448 }

449#ifndef NDEBUG

451 dbgs() << "*** Scheduling failed! ***\n";

453 dbgs() << " has been released too many times!\n";

455 }

456#endif

457

459}

460

461

462void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {

465

467 continue;

468

470 continue;

471

472 releaseSucc(SU, &Succ);

473 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)

474 TopReadySUs.push_back(SuccSU);

475 }

476}

477

478void SIScheduleBlock::nodeScheduled(SUnit *SU) {

479

481 std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);

482 if (I == TopReadySUs.end()) {

483 dbgs() << "Data Structure Bug in SI Scheduler\n";

485 }

486 TopReadySUs.erase(I);

487

488 releaseSuccessors(SU, true);

489

490

491 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])

492 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);

493

496 std::map<unsigned, unsigned>::iterator I =

498 if (I != NodeNum2Index.end())

499 HasLowLatencyNonWaitedParent[I->second] = 1;

500 }

501 }

503}

504

506

507 for (SUnit* SU : SUnits) {

508 releaseSuccessors(SU, false);

510 HighLatencyBlock = true;

511 }

512 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);

513}

514

515

517 unsigned PredID = Pred->getID();

518

519

521 if (PredID == P->getID())

522 return;

523 }

524 Preds.push_back(Pred);

525

529 return PredID == S.first->getID();

530 }) &&

531 "Loop in the Block Graph!");

532}

533

536 unsigned SuccID = Succ->getID();

537

538

539 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {

540 if (SuccID == S.first->getID()) {

543 S.second = Kind;

544 return;

545 }

546 }

548 ++NumHighLatencySuccessors;

549 Succs.emplace_back(Succ, Kind);

550

553 "Loop in the Block Graph!");

554}

555

556#ifndef NDEBUG

558 dbgs() << "Block (" << ID << ")\n";

559 if (!full)

560 return;

561

562 dbgs() << "\nContains High Latency Instruction: "

563 << HighLatencyBlock << '\n';

564 dbgs() << "\nDepends On:\n";

566 P->printDebug(false);

567 }

568

569 dbgs() << "\nSuccessors:\n";

570 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {

572 dbgs() << "(Data Dep) ";

573 S.first->printDebug(false);

574 }

575

576 if (Scheduled) {

577 dbgs() << "LiveInPressure "

578 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '

579 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';

580 dbgs() << "LiveOutPressure "

581 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '

582 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";

583 dbgs() << "LiveIns:\n";

584 for (unsigned Reg : LiveInRegs)

586

587 dbgs() << "\nLiveOuts:\n";

588 for (unsigned Reg : LiveOutRegs)

590 }

591

592 dbgs() << "\nInstructions:\n";

593 for (const SUnit* SU : SUnits)

595

596 dbgs() << "///////////////////////\n";

597}

598#endif

599

600

601

603 : DAG(DAG) {}

604

607 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =

608 Blocks.find(BlockVariant);

609 if (B == Blocks.end()) {

611 createBlocksForVariant(BlockVariant);

612 topologicalSort();

613 scheduleInsideBlocks();

614 fillStats();

615 Res.Blocks = CurrentBlocks;

618 Blocks[BlockVariant] = Res;

619 return Res;

620 }

621 return B->second;

622}

623

626 return false;

627 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;

628}

629

630void SIScheduleBlockCreator::colorHighLatenciesAlone() {

631 unsigned DAGSize = DAG->SUnits.size();

632

633 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

636 CurrentColoring[SU->NodeNum] = NextReservedID++;

637 }

638 }

639}

640

641static bool

643 for (const auto &PredDep : SU.Preds) {

644 if (PredDep.getSUnit() == &FromSU &&

646 return true;

647 }

648 return false;

649}

650

651void SIScheduleBlockCreator::colorHighLatenciesGroups() {

652 unsigned DAGSize = DAG->SUnits.size();

653 unsigned NumHighLatencies = 0;

654 unsigned GroupSize;

655 int Color = NextReservedID;

656 unsigned Count = 0;

657 std::set FormingGroup;

658

659 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

662 ++NumHighLatencies;

663 }

664

665 if (NumHighLatencies == 0)

666 return;

667

668 if (NumHighLatencies <= 6)

669 GroupSize = 2;

670 else if (NumHighLatencies <= 12)

671 GroupSize = 3;

672 else

673 GroupSize = 4;

674

678 unsigned CompatibleGroup = true;

679 int ProposedColor = Color;

680 std::vector AdditionalElements;

681

682

683

684

685

686

687

688

689

690

691

692 for (unsigned j : FormingGroup) {

693 bool HasSubGraph;

694 std::vector SubGraph;

695

696

697

698#ifndef NDEBUG

700 HasSubGraph);

701 assert(!HasSubGraph);

702#endif

704 HasSubGraph);

705 if (!HasSubGraph)

706 continue;

707 if (SubGraph.size() > 5) {

708

709 CompatibleGroup = false;

710 break;

711 }

712

713 for (unsigned k : SubGraph) {

714

715

716

717

718 if (DAG->IsHighLatencySU[k] || (CurrentColoring[k] != ProposedColor &&

719 CurrentColoring[k] != 0)) {

720 CompatibleGroup = false;

721 break;

722 }

723

724

726 CompatibleGroup = false;

727 break;

728 }

729 }

730 if (!CompatibleGroup)

731 break;

732

734 CompatibleGroup = false;

735 break;

736 }

737

738

739

740

741

743 }

744 if (CompatibleGroup) {

745 FormingGroup.insert(SU.NodeNum);

746 for (unsigned j : AdditionalElements)

747 CurrentColoring[j] = ProposedColor;

748 CurrentColoring[SU.NodeNum] = ProposedColor;

749 ++Count;

750 }

751

752

753

754 if (!CompatibleGroup) {

755 FormingGroup.clear();

756 Color = ++NextReservedID;

757 ProposedColor = Color;

758 FormingGroup.insert(SU.NodeNum);

759 CurrentColoring[SU.NodeNum] = ProposedColor;

760 Count = 0;

761 } else if (Count == GroupSize) {

762 FormingGroup.clear();

763 Color = ++NextReservedID;

764 ProposedColor = Color;

765 Count = 0;

766 }

767 }

768 }

769}

770

771void SIScheduleBlockCreator::colorComputeReservedDependencies() {

772 unsigned DAGSize = DAG->SUnits.size();

773 std::map<std::set, unsigned> ColorCombinations;

774

775 CurrentTopDownReservedDependencyColoring.clear();

776 CurrentBottomUpReservedDependencyColoring.clear();

777

778 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);

779 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);

780

781

782

783

786 std::set SUColors;

787

788

789 if (CurrentColoring[SU->NodeNum]) {

790 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =

791 CurrentColoring[SU->NodeNum];

792 continue;

793 }

794

795 for (SDep& PredDep : SU->Preds) {

797 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)

798 continue;

799 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)

800 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);

801 }

802

803 if (SUColors.empty())

804 continue;

805

806 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)

807 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =

808 *SUColors.begin();

809 else {

810 std::map<std::set, unsigned>::iterator Pos =

811 ColorCombinations.find(SUColors);

812 if (Pos != ColorCombinations.end()) {

813 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;

814 } else {

815 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =

816 NextNonReservedID;

817 ColorCombinations[SUColors] = NextNonReservedID++;

818 }

819 }

820 }

821

822 ColorCombinations.clear();

823

824

825

828 std::set SUColors;

829

830

831 if (CurrentColoring[SU->NodeNum]) {

832 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =

833 CurrentColoring[SU->NodeNum];

834 continue;

835 }

836

837 for (SDep& SuccDep : SU->Succs) {

839 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

840 continue;

841 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)

842 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);

843 }

844

845 if (SUColors.empty())

846 continue;

847

848 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)

849 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =

850 *SUColors.begin();

851 else {

852 std::map<std::set, unsigned>::iterator Pos =

853 ColorCombinations.find(SUColors);

854 if (Pos != ColorCombinations.end()) {

855 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;

856 } else {

857 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =

858 NextNonReservedID;

859 ColorCombinations[SUColors] = NextNonReservedID++;

860 }

861 }

862 }

863}

864

865void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {

866 std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;

867

868

869

870

872 std::pair<unsigned, unsigned> SUColors;

873

874

875 if (CurrentColoring[SU.NodeNum])

876 continue;

877

878 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum];

879 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum];

880

882 ColorCombinations.try_emplace(SUColors, NextNonReservedID);

883 CurrentColoring[SU.NodeNum] = Pos->second;

884 if (Inserted)

885 NextNonReservedID++;

886 }

887}

888

889void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {

890 unsigned DAGSize = DAG->SUnits.size();

891 std::vector PendingColoring = CurrentColoring;

892

893 assert(DAGSize >= 1 &&

894 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&

895 CurrentTopDownReservedDependencyColoring.size() == DAGSize);

896

897

898 if (*llvm::max_element(CurrentBottomUpReservedDependencyColoring) == 0 &&

899 *llvm::max_element(CurrentTopDownReservedDependencyColoring) == 0)

900 return;

901

904 std::set SUColors;

905 std::set SUColorsPending;

906

907 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

908 continue;

909

910 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||

911 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)

912 continue;

913

914 for (SDep& SuccDep : SU->Succs) {

916 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

917 continue;

918 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||

919 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)

920 SUColors.insert(CurrentColoring[Succ->NodeNum]);

921 SUColorsPending.insert(PendingColoring[Succ->NodeNum]);

922 }

923

924

925

926 if (SUColors.size() == 1 && SUColorsPending.size() == 1)

927 PendingColoring[SU->NodeNum] = *SUColors.begin();

928 else

929

930 PendingColoring[SU->NodeNum] = NextNonReservedID++;

931 }

932 CurrentColoring = PendingColoring;

933}

934

935

936void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {

937 unsigned DAGSize = DAG->SUnits.size();

938 unsigned PreviousColor;

939 std::set SeenColors;

940

941 if (DAGSize <= 1)

942 return;

943

944 PreviousColor = CurrentColoring[0];

945

946 for (unsigned i = 1, e = DAGSize; i != e; ++i) {

948 unsigned CurrentColor = CurrentColoring[i];

949 unsigned PreviousColorSave = PreviousColor;

951

952 if (CurrentColor != PreviousColor)

953 SeenColors.insert(PreviousColor);

954 PreviousColor = CurrentColor;

955

956 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

957 continue;

958

959 if (SeenColors.find(CurrentColor) == SeenColors.end())

960 continue;

961

962 if (PreviousColorSave != CurrentColor)

963 CurrentColoring[i] = NextNonReservedID++;

964 else

965 CurrentColoring[i] = CurrentColoring[i-1];

966 }

967}

968

969void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {

970 unsigned DAGSize = DAG->SUnits.size();

971

974 std::set SUColors;

975

976 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

977 continue;

978

979

980

982 continue;

983

984 for (SDep& SuccDep : SU->Succs) {

986 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

987 continue;

988 SUColors.insert(CurrentColoring[Succ->NodeNum]);

989 }

990 if (SUColors.size() == 1)

991 CurrentColoring[SU->NodeNum] = *SUColors.begin();

992 }

993}

994

995void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {

996 unsigned DAGSize = DAG->SUnits.size();

997

1000 std::set SUColors;

1001

1002 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

1003 continue;

1004

1005 for (SDep& SuccDep : SU->Succs) {

1007 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1008 continue;

1009 SUColors.insert(CurrentColoring[Succ->NodeNum]);

1010 }

1011 if (SUColors.size() == 1)

1012 CurrentColoring[SU->NodeNum] = *SUColors.begin();

1013 }

1014}

1015

1016void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {

1017 unsigned DAGSize = DAG->SUnits.size();

1018

1021 std::set SUColors;

1022

1023 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

1024 continue;

1025

1026 for (SDep& SuccDep : SU->Succs) {

1028 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1029 continue;

1030 SUColors.insert(CurrentColoring[Succ->NodeNum]);

1031 }

1032 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)

1033 CurrentColoring[SU->NodeNum] = *SUColors.begin();

1034 }

1035}

1036

1037void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {

1038 unsigned DAGSize = DAG->SUnits.size();

1039 std::map<unsigned, unsigned> ColorCount;

1040

1043 unsigned color = CurrentColoring[SU->NodeNum];

1044 ++ColorCount[color];

1045 }

1046

1049 unsigned color = CurrentColoring[SU->NodeNum];

1050 std::set SUColors;

1051

1052 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

1053 continue;

1054

1055 if (ColorCount[color] > 1)

1056 continue;

1057

1058 for (SDep& SuccDep : SU->Succs) {

1060 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1061 continue;

1062 SUColors.insert(CurrentColoring[Succ->NodeNum]);

1063 }

1064 if (SUColors.size() == 1 && *SUColors.begin() != color) {

1065 --ColorCount[color];

1066 CurrentColoring[SU->NodeNum] = *SUColors.begin();

1067 ++ColorCount[*SUColors.begin()];

1068 }

1069 }

1070}

1071

1072void SIScheduleBlockCreator::cutHugeBlocks() {

1073

1074}

1075

1076void SIScheduleBlockCreator::regroupNoUserInstructions() {

1077 unsigned DAGSize = DAG->SUnits.size();

1078 int GroupID = NextNonReservedID++;

1079

1082 bool hasSuccessor = false;

1083

1084 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

1085 continue;

1086

1087 for (SDep& SuccDep : SU->Succs) {

1089 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1090 continue;

1091 hasSuccessor = true;

1092 }

1093 if (!hasSuccessor)

1094 CurrentColoring[SU->NodeNum] = GroupID;

1095 }

1096}

1097

1098void SIScheduleBlockCreator::colorExports() {

1099 unsigned ExportColor = NextNonReservedID++;

1101

1102

1103

1104

1105

1106

1107

1108

1109

1110

1111

1115

1116

1117 for (const SDep &SuccDep : SU.Succs) {

1120

1121 continue;

1122 }

1124 "SUnit unexpectedly not representing an instruction!");

1125

1127

1128

1129

1130

1131

1132 return;

1133 }

1134 }

1136 }

1137 }

1138

1139

1140 for (unsigned j : ExpGroup)

1141 CurrentColoring[j] = ExportColor;

1142}

1143

1145 unsigned DAGSize = DAG->SUnits.size();

1146 std::map<unsigned,unsigned> RealID;

1147

1148 CurrentBlocks.clear();

1149 CurrentColoring.clear();

1150 CurrentColoring.resize(DAGSize, 0);

1151 Node2CurrentBlock.clear();

1152

1153

1155

1156 NextReservedID = 1;

1157 NextNonReservedID = DAGSize + 1;

1158

1160

1162 colorHighLatenciesGroups();

1163 else

1164 colorHighLatenciesAlone();

1165 colorComputeReservedDependencies();

1166 colorAccordingToReservedDependencies();

1167 colorEndsAccordingToDependencies();

1169 colorForceConsecutiveOrderInGroup();

1170 regroupNoUserInstructions();

1171 colorMergeConstantLoadsNextGroup();

1172 colorMergeIfPossibleNextGroupOnlyForReserved();

1173 colorExports();

1174

1175

1176 Node2CurrentBlock.resize(DAGSize, -1);

1177 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1179 unsigned Color = CurrentColoring[SU->NodeNum];

1180 if (RealID.find(Color) == RealID.end()) {

1181 int ID = CurrentBlocks.size();

1182 BlockPtrs.push_back(std::make_unique(DAG, this, ID));

1183 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());

1184 RealID[Color] = ID;

1185 }

1186 CurrentBlocks[RealID[Color]]->addUnit(SU);

1187 Node2CurrentBlock[SU->NodeNum] = RealID[Color];

1188 }

1189

1190

1191 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1193 int SUID = Node2CurrentBlock[i];

1194 for (SDep& SuccDep : SU->Succs) {

1196 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1197 continue;

1198 if (Node2CurrentBlock[Succ->NodeNum] != SUID)

1199 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],

1201 }

1202 for (SDep& PredDep : SU->Preds) {

1204 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)

1205 continue;

1206 if (Node2CurrentBlock[Pred->NodeNum] != SUID)

1207 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);

1208 }

1209 }

1210

1211

1213 Block->finalizeUnits();

1215 dbgs() << "Blocks created:\n\n";

1217 Block->printDebug(true);

1218 });

1219}

1220

1221

1222

1223

1227 for (; I != End; ++I) {

1228 if (I->isDebugInstr())

1229 break;

1230 }

1231 return I;

1232}

1233

1234void SIScheduleBlockCreator::topologicalSort() {

1235 unsigned DAGSize = CurrentBlocks.size();

1236 std::vector WorkList;

1237

1239

1240 WorkList.reserve(DAGSize);

1241 TopDownIndex2Block.resize(DAGSize);

1242 TopDownBlock2Index.resize(DAGSize);

1243 BottomUpIndex2Block.resize(DAGSize);

1244

1245 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1247 unsigned Degree = Block->getSuccs().size();

1248 TopDownBlock2Index[i] = Degree;

1249 if (Degree == 0) {

1250 WorkList.push_back(i);

1251 }

1252 }

1253

1254 int Id = DAGSize;

1255 while (!WorkList.empty()) {

1256 int i = WorkList.back();

1258 WorkList.pop_back();

1259 TopDownBlock2Index[i] = --Id;

1260 TopDownIndex2Block[Id] = i;

1262 if (!--TopDownBlock2Index[Pred->getID()])

1263 WorkList.push_back(Pred->getID());

1264 }

1265 }

1266

1267#ifndef NDEBUG

1268

1269 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1272 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&

1273 "Wrong Top Down topological sorting");

1274 }

1275 }

1276#endif

1277

1278 BottomUpIndex2Block = std::vector(TopDownIndex2Block.rbegin(),

1279 TopDownIndex2Block.rend());

1280}

1281

1282void SIScheduleBlockCreator::scheduleInsideBlocks() {

1283 unsigned DAGSize = CurrentBlocks.size();

1284

1286

1287

1288

1289 LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");

1290 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1292 Block->fastSchedule();

1293 }

1294

1295

1296

1297

1298

1300 std::vectorMachineBasicBlock::iterator PosOld;

1301 std::vectorMachineBasicBlock::iterator PosNew;

1302 PosOld.reserve(DAG->SUnits.size());

1303 PosNew.reserve(DAG->SUnits.size());

1304

1305 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1306 int BlockIndice = TopDownIndex2Block[i];

1308 std::vector<SUnit*> SUs = Block->getScheduledUnits();

1309

1310 for (SUnit* SU : SUs) {

1313 PosOld.push_back(Pos);

1314 if (&*CurrentTopFastSched == MI) {

1315 PosNew.push_back(Pos);

1316 CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,

1318 } else {

1319

1321

1322

1323

1324

1325

1326

1328 PosNew.push_back(CurrentTopFastSched);

1329 }

1330 }

1331 }

1332

1333

1334

1335

1336

1337 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1339 std::vector<SUnit*> SUs = Block->getScheduledUnits();

1340 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());

1341 }

1342

1344

1345 for (unsigned i = PosOld.size(), e = 0; i != e; --i) {

1348 if (PNew != POld) {

1349

1351

1352

1354 }

1355 }

1356

1359 Block->printDebug(true);

1360 });

1361}

1362

1363void SIScheduleBlockCreator::fillStats() {

1364 unsigned DAGSize = CurrentBlocks.size();

1365

1366 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1367 int BlockIndice = TopDownIndex2Block[i];

1369 if (Block->getPreds().empty())

1370 Block->Depth = 0;

1371 else {

1372 unsigned Depth = 0;

1374 if (Depth < Pred->Depth + Pred->getCost())

1375 Depth = Pred->Depth + Pred->getCost();

1376 }

1378 }

1379 }

1380

1381 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1382 int BlockIndice = BottomUpIndex2Block[i];

1384 if (Block->getSuccs().empty())

1385 Block->Height = 0;

1386 else {

1387 unsigned Height = 0;

1388 for (const auto &Succ : Block->getSuccs())

1389 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());

1390 Block->Height = Height;

1391 }

1392 }

1393}

1394

1395

1396

1400 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),

1401 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),

1402 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {

1403

1404

1405

1406

1407

1408

1409

1410

1411

1412

1413

1414 LiveOutRegsNumUsages.resize(Blocks.size());

1416 for (unsigned Reg : Block->getInRegs()) {

1417 bool Found = false;

1418 int topoInd = -1;

1420 std::set PredOutRegs = Pred->getOutRegs();

1421 std::set::iterator RegPos = PredOutRegs.find(Reg);

1422

1423 if (RegPos != PredOutRegs.end()) {

1424 Found = true;

1427 }

1428 }

1429 }

1430

1431 if (!Found)

1432 continue;

1433

1435 ++LiveOutRegsNumUsages[PredID][Reg];

1436 }

1437 }

1438

1439 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);

1440 BlockNumPredsLeft.resize(Blocks.size());

1441 BlockNumSuccsLeft.resize(Blocks.size());

1442

1443 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {

1445 BlockNumPredsLeft[i] = Block->getPreds().size();

1446 BlockNumSuccsLeft[i] = Block->getSuccs().size();

1447 }

1448

1449#ifndef NDEBUG

1450 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {

1453 }

1454#endif

1455

1456 std::set InRegs = DAG->getInRegs();

1457 addLiveRegs(InRegs);

1458

1459

1460

1461

1462 for (unsigned Reg : DAG->getOutRegs()) {

1463 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {

1464

1467 const std::set &OutRegs = Block->getOutRegs();

1468

1469 if (OutRegs.find(Reg) == OutRegs.end())

1470 continue;

1471

1472 ++LiveOutRegsNumUsages[ID][Reg];

1473 break;

1474 }

1475 }

1476

1477

1478

1480 for (unsigned Reg : Block->getInRegs()) {

1481 bool Found = false;

1483 std::set PredOutRegs = Pred->getOutRegs();

1484 std::set::iterator RegPos = PredOutRegs.find(Reg);

1485

1486 if (RegPos != PredOutRegs.end()) {

1487 Found = true;

1488 break;

1489 }

1490 }

1491

1492 if (!Found)

1493 ++LiveRegsConsumers[Reg];

1494 }

1495 }

1496

1497 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {

1499 if (BlockNumPredsLeft[i] == 0) {

1500 ReadyBlocks.push_back(Block);

1501 }

1502 }

1503

1505 BlocksScheduled.push_back(Block);

1506 blockScheduled(Block);

1507 }

1508

1510 : BlocksScheduled) {

1511 dbgs() << ' ' << Block->getID();

1512 } dbgs() << '\n';);

1513}

1514

1515bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,

1516 SIBlockSchedCandidate &TryCand) {

1517 if (!Cand.isValid()) {

1519 return true;

1520 }

1521

1522

1524 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))

1525 return true;

1526

1528 TryCand, Cand, Latency))

1529 return true;

1530 if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,

1531 TryCand, Cand, Depth))

1532 return true;

1534 Cand.NumHighLatencySuccessors,

1536 return true;

1537 return false;

1538}

1539

1540bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,

1541 SIBlockSchedCandidate &TryCand) {

1542 if (!Cand.isValid()) {

1544 return true;

1545 }

1546

1547 if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,

1549 return true;

1551 Cand.NumSuccessors > 0,

1553 return true;

1555 return true;

1556 if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,

1558 return true;

1559 return false;

1560}

1561

1563 SIBlockSchedCandidate Cand;

1564 std::vector<SIScheduleBlock*>::iterator Best;

1566 if (ReadyBlocks.empty())

1567 return nullptr;

1568

1570 VregCurrentUsage, SregCurrentUsage);

1571 if (VregCurrentUsage > maxVregUsage)

1572 maxVregUsage = VregCurrentUsage;

1573 if (SregCurrentUsage > maxSregUsage)

1574 maxSregUsage = SregCurrentUsage;

1575 LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";

1577 : ReadyBlocks) dbgs()

1578 << Block->getID() << ' ';

1579 dbgs() << "\nCurrent Live:\n";

1580 for (unsigned Reg

1581 : LiveRegs) dbgs()

1583 dbgs() << '\n';

1584 dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';

1585 dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';);

1586

1587 Cand.Block = nullptr;

1588 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),

1589 E = ReadyBlocks.end(); I != E; ++I) {

1590 SIBlockSchedCandidate TryCand;

1591 TryCand.Block = *I;

1592 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();

1593 TryCand.VGPRUsageDiff =

1594 checkRegUsageImpact(TryCand.Block->getInRegs(),

1595 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];

1596 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();

1597 TryCand.NumHighLatencySuccessors =

1598 TryCand.Block->getNumHighLatencySuccessors();

1599 TryCand.LastPosHighLatParentScheduled =

1600 (unsigned int) std::max (0,

1601 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -

1602 LastPosWaitedHighLatency);

1603 TryCand.Height = TryCand.Block->Height;

1604

1605 if (VregCurrentUsage > 120 ||

1607 if (!tryCandidateRegUsage(Cand, TryCand) &&

1609 tryCandidateLatency(Cand, TryCand);

1610 } else {

1611 if (!tryCandidateLatency(Cand, TryCand))

1612 tryCandidateRegUsage(Cand, TryCand);

1613 }

1614 if (TryCand.Reason != NoCand) {

1615 Cand.setBest(TryCand);

1616 Best = I;

1617 LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '

1619 }

1620 }

1621

1622 LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';

1623 dbgs() << "Is a block with high latency instruction: "

1624 << (Cand.IsHighLatency ? "yes\n" : "no\n");

1625 dbgs() << "Position of last high latency dependency: "

1626 << Cand.LastPosHighLatParentScheduled << '\n';

1627 dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';

1628 dbgs() << '\n';);

1629

1630 Block = Cand.Block;

1631 ReadyBlocks.erase(Best);

1633}

1634

1635

1636

1637void SIScheduleBlockScheduler::addLiveRegs(std::set &Regs) {

1639

1640 if (Reg.isVirtual())

1641 continue;

1642

1643 (void) LiveRegs.insert(Reg);

1644 }

1645}

1646

1648 std::set &Regs) {

1649 for (unsigned Reg : Regs) {

1650

1651 std::set::iterator Pos = LiveRegs.find(Reg);

1652 assert (Pos != LiveRegs.end() &&

1653 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&

1654 LiveRegsConsumers[Reg] >= 1);

1655 --LiveRegsConsumers[Reg];

1656 if (LiveRegsConsumers[Reg] == 0)

1657 LiveRegs.erase(Pos);

1658 }

1659}

1660

1661void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {

1663 if (--BlockNumPredsLeft[Block.first->getID()] == 0)

1664 ReadyBlocks.push_back(Block.first);

1665

1668 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;

1669 }

1670}

1671

1673 decreaseLiveRegs(Block, Block->getInRegs());

1674 addLiveRegs(Block->getOutRegs());

1675 releaseBlockSuccs(Block);

1676 for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) {

1677

1678 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||

1679 LiveRegsConsumers[RegP.first] == 0);

1680 LiveRegsConsumers[RegP.first] += RegP.second;

1681 }

1682 if (LastPosHighLatencyParentScheduled[Block->getID()] >

1683 (unsigned)LastPosWaitedHighLatency)

1684 LastPosWaitedHighLatency =

1685 LastPosHighLatencyParentScheduled[Block->getID()];

1686 ++NumBlockScheduled;

1687}

1688

1689std::vector

1690SIScheduleBlockScheduler::checkRegUsageImpact(std::set &InRegs,

1691 std::set &OutRegs) {

1692 std::vector DiffSetPressure;

1694

1695 for (Register Reg : InRegs) {

1696

1697 if (Reg.isVirtual())

1698 continue;

1699 if (LiveRegsConsumers[Reg] > 1)

1700 continue;

1702 for (; PSetI.isValid(); ++PSetI) {

1703 DiffSetPressure[*PSetI] -= PSetI.getWeight();

1704 }

1705 }

1706

1707 for (Register Reg : OutRegs) {

1708

1709 if (Reg.isVirtual())

1710 continue;

1712 for (; PSetI.isValid(); ++PSetI) {

1713 DiffSetPressure[*PSetI] += PSetI.getWeight();

1714 }

1715 }

1716

1717 return DiffSetPressure;

1718}

1719

1720

1721

1723SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,

1724 SISchedulerBlockSchedulerVariant ScheduleVariant) {

1727 std::vector<SIScheduleBlock*> ScheduledBlocks;

1729

1730 ScheduledBlocks = Scheduler.getBlocks();

1731

1733 std::vector<SUnit*> SUs = Block->getScheduledUnits();

1734

1737 }

1738

1741 return Res;

1742}

1743

1744

1745

1750}

1751

1753

1754

1755

1756

1757void SIScheduleDAGMI::topologicalSort() {

1759

1762}

1763

1764

1765

1766

1767

1768

1769

1770void SIScheduleDAGMI::moveLowLatencies() {

1771 unsigned DAGSize = SUnits.size();

1772 int LastLowLatencyUser = -1;

1773 int LastLowLatencyPos = -1;

1774

1775 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {

1776 SUnit *SU = &SUnits[ScheduledSUnits[i]];

1777 bool IsLowLatencyUser = false;

1778 unsigned MinPos = 0;

1779

1780 for (SDep& PredDep : SU->Preds) {

1783 IsLowLatencyUser = true;

1784 }

1785 if (Pred->NodeNum >= DAGSize)

1786 continue;

1787 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];

1788 if (PredPos >= MinPos)

1789 MinPos = PredPos + 1;

1790 }

1791

1793 unsigned BestPos = LastLowLatencyUser + 1;

1794 if ((int)BestPos <= LastLowLatencyPos)

1795 BestPos = LastLowLatencyPos + 1;

1796 if (BestPos < MinPos)

1797 BestPos = MinPos;

1798 if (BestPos < i) {

1799 for (unsigned u = i; u > BestPos; --u) {

1800 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];

1801 ScheduledSUnits[u] = ScheduledSUnits[u-1];

1802 }

1803 ScheduledSUnits[BestPos] = SU->NodeNum;

1804 ScheduledSUnitsInv[SU->NodeNum] = BestPos;

1805 }

1806 LastLowLatencyPos = BestPos;

1807 if (IsLowLatencyUser)

1808 LastLowLatencyUser = BestPos;

1809 } else if (IsLowLatencyUser) {

1810 LastLowLatencyUser = i;

1811

1812

1814 bool CopyForLowLat = false;

1815 for (SDep& SuccDep : SU->Succs) {

1817 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1818 continue;

1820 CopyForLowLat = true;

1821 }

1822 }

1823 if (!CopyForLowLat)

1824 continue;

1825 if (MinPos < i) {

1826 for (unsigned u = i; u > MinPos; --u) {

1827 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];

1828 ScheduledSUnits[u] = ScheduledSUnits[u-1];

1829 }

1830 ScheduledSUnits[MinPos] = SU->NodeNum;

1831 ScheduledSUnitsInv[SU->NodeNum] = MinPos;

1832 }

1833 }

1834 }

1835}

1836

1838 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {

1839 SUnits[i].isScheduled = false;

1840 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;

1841 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;

1842 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;

1843 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;

1844 }

1845}

1846

1847

1848template<typename _Iterator> void

1850 unsigned &VgprUsage, unsigned &SgprUsage) {

1851 VgprUsage = 0;

1852 SgprUsage = 0;

1853 for (_Iterator RegI = First; RegI != End; ++RegI) {

1855

1856 if (Reg.isVirtual())

1857 continue;

1859 for (; PSetI.isValid(); ++PSetI) {

1860 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)

1862 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)

1864 }

1865 }

1866}

1867

1869{

1873

1876

1882

1883 topologicalSort();

1885

1886

1887

1888

1891

1892

1893

1894 SUnitsLinksBackup = SUnits;

1898

1902

1903 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {

1906 int64_t OffLatReg;

1909 bool OffsetIsScalable;

1910 if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,

1911 OffsetIsScalable, TRI))

1915 }

1916

1920

1921

1922

1926 Variants[] = {

1928

1930

1931

1933

1934

1935 };

1936 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {

1937 Temp = Scheduler.scheduleVariant(v.first, v.second);

1939 Best = Temp;

1940 }

1941 }

1942

1943

1947 Variants[] = {

1948

1950

1953

1956 };

1957 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {

1958 Temp = Scheduler.scheduleVariant(v.first, v.second);

1960 Best = Temp;

1961 }

1962 }

1963

1964 ScheduledSUnits = Best.SUs;

1965 ScheduledSUnitsInv.resize(SUnits.size());

1966

1967 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {

1968 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;

1969 }

1970

1971 moveLowLatencies();

1972

1973

1974

1977

1978 for (unsigned I : ScheduledSUnits) {

1980

1982

1985 }

1986

1988

1990

1992 dbgs() << "*** Final schedule for "

1995 dbgs() << '\n';

1996 });

1997}

unsigned const MachineRegisterInfo * MRI

Provides AMDGPU specific target descriptions.

static const Function * getParent(const Value *V)

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

DenseMap< Block *, BlockRelaxAux > Blocks

Machine Instruction Scheduler

static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)

If this iterator is a debug value, increment until reaching the End or a non-debug instruction.

Interface definition for SIInstrInfo.

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)

static const char * getReasonStr(SIScheduleCandReason Reason)

static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)

SI Machine Scheduler interface.

GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.

void handleMove(MachineInstr &MI, bool UpdateFlags=false)

Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.

SlotIndex getInstructionIndex(const MachineInstr &Instr) const

Returns the base index of the given instruction.

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

MachineOperand class - Representation of each machine instruction operand.

defusechain_iterator - This class provides iterator support for machine operands in the function that...

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

PSetIterator getPressureSets(Register RegUnit) const

Get an iterator over the pressure sets affected by the given physical or virtual register.

Iterate over the pressure sets affected by the given physical or virtual register.

unsigned getWeight() const

Track the current register pressure at some position in the instruction stream, and remember the high...

void setPos(MachineBasicBlock::const_iterator Pos)

void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)

Force liveness of virtual registers or physical register units.

MachineBasicBlock::const_iterator getPos() const

Get the MI position corresponding to this register pressure.

void closeTop()

Set the boundary for the top of the region and summarize live ins.

void advance()

Advance across the current instruction.

void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)

Get the pressure of each PSet after traversing this instruction top-down.

Wrapper class representing virtual and physical registers.

@ Data

Regular data dependence (aka true-dependence).

bool isWeak() const

Tests if this a weak dependence.

bool isCtrl() const

Shorthand for getKind() != SDep::Data.

static bool isEXP(const MachineInstr &MI)

bool isHighLatencyDef(int Opc) const override

bool isLowLatencyInstruction(const MachineInstr &MI) const

bool isSUInBlock(SUnit *SU, unsigned ID)

SIScheduleBlockCreator(SIScheduleDAGMI *DAG)

SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)

SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)

ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const

void addPred(SIScheduleBlock *Pred)

void printDebug(bool Full)

void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)

void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)

void addUnit(SUnit *SU)

Functions for Block construction.

bool isHighLatencyBlock()

void restoreSULinksLeft()

std::vector< int > BottomUpIndex2SU

std::vector< unsigned > IsHighLatencySU

std::vector< unsigned > LowLatencyOffset

std::vector< int > TopDownIndex2SU

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

ScheduleDAGTopologicalSort * GetTopo()

void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)

MachineRegisterInfo * getMRI()

SIScheduleDAGMI(MachineSchedContext *C)

MachineBasicBlock::iterator getCurrentBottom()

std::vector< unsigned > IsLowLatencySU

MachineBasicBlock::iterator getCurrentTop()

std::set< unsigned > getInRegs()

MachineBasicBlock * getBB()

void initRPTracker(RegPressureTracker &RPTracker)

std::set< unsigned > getOutRegs()

~SIScheduleDAGMI() override

const TargetRegisterInfo * getTRI()

Scheduling unit. This is a node in the scheduling DAG.

bool isInstr() const

Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.

unsigned NodeNum

Entry # of node in the node vector.

bool isScheduled

True once scheduled.

SmallVector< SDep, 4 > Succs

All sunit successors.

SmallVector< SDep, 4 > Preds

All sunit predecessors.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

ScheduleDAGTopologicalSort Topo

Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.

void dumpNode(const SUnit &SU) const override

MachineBasicBlock::iterator begin() const

Returns an iterator to the top of the current scheduling region.

MachineBasicBlock::iterator RegionBegin

The beginning of the range to be scheduled.

ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...

void scheduleMI(SUnit *SU, bool IsTopNode)

Move an instruction and update register pressure.

void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)

Release ExitSU predecessors and setup scheduler queues.

void buildDAGWithRegPressure()

Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.

void dump() const override

RegPressureTracker TopRPTracker

void dumpSchedule() const

dump the scheduled Sequence.

std::unique_ptr< MachineSchedStrategy > SchedImpl

void postProcessDAG()

Apply each ScheduleDAGMutation step in order.

void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)

MachineBasicBlock::iterator CurrentBottom

The bottom of the unscheduled zone.

void viewGraph() override

Out-of-line implementation with no arguments is handy for gdb.

void placeDebugValues()

Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.

MachineBasicBlock::iterator CurrentTop

The top of the unscheduled zone.

std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)

Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...

void InitDAGTopologicalSorting()

Creates the initial topological ordering from the DAG to be scheduled.

reverse_iterator rbegin()

MachineRegisterInfo & MRI

Virtual/real register map.

const TargetInstrInfo * TII

Target instruction information.

std::vector< SUnit > SUnits

The scheduling units.

const TargetRegisterInfo * TRI

Target processor register info.

SlotIndex - An opaque wrapper around machine indexes.

SlotIndex getRegSlot(bool EC=false) const

Returns the register use/def slot in the current instruction for a normal or early-clobber def.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

virtual unsigned getNumRegPressureSets() const =0

Get the number of dimensions of register pressure.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)

static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)

Reg

All possible values of the reg field in the ModR/M byte.

This is an optimization pass for GlobalISel generic memory operations.

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

cl::opt< bool > PrintDAGs

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

SISchedulerBlockSchedulerVariant

cl::opt< bool > ViewMISchedDAGs

Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)

Create Printable object to print virtual registers and physical registers on a raw_ostream.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

SISchedulerBlockCreatorVariant

@ LatenciesAlonePlusConsecutive

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

Implement std::hash so that hash_code can be used in STL containers.

RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

std::vector< unsigned > MaxSetPressure

Map of max reg pressure indexed by pressure set ID, not class ID.

std::vector< unsigned > SUs

std::vector< int > TopDownIndex2Block

std::vector< SIScheduleBlock * > Blocks

std::vector< int > TopDownBlock2Index

SIScheduleCandReason Reason

void setRepeat(SIScheduleCandReason R)