LLVM: lib/Target/AMDGPU/SIMachineScheduler.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

19

20using namespace llvm;

21

22#define DEBUG_TYPE "machine-scheduler"

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122#ifndef NDEBUG

123

125 switch (Reason) {

126 case NoCand: return "NOCAND";

127 case RegUsage: return "REGUSAGE";

128 case Latency: return "LATENCY";

129 case Successor: return "SUCCESSOR";

130 case Depth: return "DEPTH";

132 }

134}

135

136#endif

137

139static bool tryLess(int TryVal, int CandVal,

143 if (TryVal < CandVal) {

144 TryCand.Reason = Reason;

145 return true;

146 }

147 if (TryVal > CandVal) {

148 if (Cand.Reason > Reason)

149 Cand.Reason = Reason;

150 return true;

151 }

153 return false;

154}

155

160 if (TryVal > CandVal) {

161 TryCand.Reason = Reason;

162 return true;

163 }

164 if (TryVal < CandVal) {

165 if (Cand.Reason > Reason)

166 Cand.Reason = Reason;

167 return true;

168 }

170 return false;

171}

172}

173

174

175

177 NodeNum2Index[SU->NodeNum] = SUnits.size();

178 SUnits.push_back(SU);

179}

180

181#ifndef NDEBUG

182void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {

183

184 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);

185 dbgs() << '\n';

186}

187#endif

188

189void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,

190 SISchedCandidate &TryCand) {

191

192 if (!Cand.isValid()) {

194 return;

195 }

196

197 if (Cand.SGPRUsage > 60 &&

200 return;

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

218 Cand.HasLowLatencyNonWaitedParent,

220 return;

221

224 return;

225

226 if (TryCand.IsLowLatency &&

227 SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,

229 return;

230

233 return;

234

235

236 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {

238 }

239}

240

241SUnit* SIScheduleBlock::pickNode() {

242 SISchedCandidate TopCand;

243

244 for (SUnit* SU : TopReadySUs) {

245 SISchedCandidate TryCand;

246 std::vector pressure;

247 std::vector MaxPressure;

248

249 TryCand.SU = SU;

250 TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);

251 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];

252 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];

253 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];

254 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];

255 TryCand.HasLowLatencyNonWaitedParent =

256 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];

257 tryCandidateTopDown(TopCand, TryCand);

258 if (TryCand.Reason != NoCand)

259 TopCand.setBest(TryCand);

260 }

261

262 return TopCand.SU;

263}

264

265

266

268 TopReadySUs.clear();

269 if (Scheduled)

270 undoSchedule();

271

272 for (SUnit* SU : SUnits) {

273 if (!SU->NumPredsLeft)

274 TopReadySUs.push_back(SU);

275 }

276

277 while (!TopReadySUs.empty()) {

278 SUnit *SU = TopReadySUs[0];

279 ScheduledSUnits.push_back(SU);

280 nodeScheduled(SU);

281 }

282

283 Scheduled = true;

284}

285

286

291 if (MI.isDebugValue())

292 continue;

294 if (InstSlot >= First && InstSlot <= Last)

295 return true;

296 }

297 return false;

298}

299

302 IntervalPressure Pressure, BotPressure;

303 RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);

304 LiveIntervals *LIS = DAG->getLIS();

305 MachineRegisterInfo *MRI = DAG->getMRI();

306 DAG->initRPTracker(TopRPTracker);

307 DAG->initRPTracker(BotRPTracker);

308 DAG->initRPTracker(RPTracker);

309

310

311

312 for (SUnit* SU : ScheduledSUnits) {

313 RPTracker.setPos(SU->getInstr());

314 RPTracker.advance();

315 }

316

317

318 RPTracker.closeRegion();

319

320

321 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);

322 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);

323

324

325 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {

326 if (RegMaskPair.VRegOrUnit.isVirtualReg())

327 LiveInRegs.insert(RegMaskPair.VRegOrUnit.asVirtualReg());

328 }

329 LiveOutRegs.clear();

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {

353 VirtRegOrUnit VRegOrUnit = RegMaskPair.VRegOrUnit;

358 LIS)) {

359 LiveOutRegs.insert(VRegOrUnit.asVirtualReg());

360 }

361 }

362

363

364

365

366

367 LiveInPressure = TopPressure.MaxSetPressure;

369

370

371 TopRPTracker.closeTop();

372}

373

376 if (!Scheduled)

378

379

380 initRegPressure(BeginBlock, EndBlock);

381 undoSchedule();

382

383

384

385 TopReadySUs.clear();

386

387 for (SUnit* SU : SUnits) {

388 if (!SU->NumPredsLeft)

389 TopReadySUs.push_back(SU);

390 }

391

392 while (!TopReadySUs.empty()) {

393 SUnit *SU = pickNode();

394 ScheduledSUnits.push_back(SU);

395 TopRPTracker.setPos(SU->getInstr());

396 TopRPTracker.advance();

397 nodeScheduled(SU);

398 }

399

400

401 InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());

402

403

404#ifndef NDEBUG

405 assert(SUnits.size() == ScheduledSUnits.size() &&

406 TopReadySUs.empty());

407 for (SUnit* SU : SUnits) {

408 assert(SU->isScheduled &&

409 SU->NumPredsLeft == 0);

410 }

411#endif

412

413 Scheduled = true;

414}

415

416void SIScheduleBlock::undoSchedule() {

417 for (SUnit* SU : SUnits) {

418 SU->isScheduled = false;

419 for (SDep& Succ : SU->Succs) {

421 undoReleaseSucc(SU, &Succ);

422 }

423 }

424 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);

425 ScheduledSUnits.clear();

426 Scheduled = false;

427}

428

429void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {

430 SUnit *SuccSU = SuccEdge->getSUnit();

431

432 if (SuccEdge->isWeak()) {

434 return;

435 }

437}

438

439void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {

440 SUnit *SuccSU = SuccEdge->getSUnit();

441

442 if (SuccEdge->isWeak()) {

444 return;

445 }

446#ifndef NDEBUG

448 dbgs() << "*** Scheduling failed! ***\n";

449 DAG->dumpNode(*SuccSU);

450 dbgs() << " has been released too many times!\n";

452 }

453#endif

454

456}

457

458

459void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {

460 for (SDep& Succ : SU->Succs) {

461 SUnit *SuccSU = Succ.getSUnit();

462

463 if (SuccSU->NodeNum >= DAG->SUnits.size())

464 continue;

465

466 if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)

467 continue;

468

469 releaseSucc(SU, &Succ);

470 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)

471 TopReadySUs.push_back(SuccSU);

472 }

473}

474

475void SIScheduleBlock::nodeScheduled(SUnit *SU) {

476

478 std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);

479 if (I == TopReadySUs.end()) {

480 dbgs() << "Data Structure Bug in SI Scheduler\n";

482 }

483 TopReadySUs.erase(I);

484

485 releaseSuccessors(SU, true);

486

487

488 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])

489 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);

490

491 if (DAG->IsLowLatencySU[SU->NodeNum]) {

492 for (SDep& Succ : SU->Succs) {

493 std::map<unsigned, unsigned>::iterator I =

495 if (I != NodeNum2Index.end())

496 HasLowLatencyNonWaitedParent[I->second] = 1;

497 }

498 }

500}

501

503

504 for (SUnit* SU : SUnits) {

505 releaseSuccessors(SU, false);

506 if (DAG->IsHighLatencySU[SU->NodeNum])

507 HighLatencyBlock = true;

508 }

509 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);

510}

511

512

514 unsigned PredID = Pred->getID();

515

516

518 if (PredID == P->getID())

519 return;

520 }

521 Preds.push_back(Pred);

522

526 return PredID == S.first->getID();

527 }) &&

528 "Loop in the Block Graph!");

529}

530

533 unsigned SuccID = Succ->getID();

534

535

536 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {

537 if (SuccID == S.first->getID()) {

539 Kind == SIScheduleBlockLinkKind::Data)

540 S.second = Kind;

541 return;

542 }

543 }

545 ++NumHighLatencySuccessors;

546 Succs.emplace_back(Succ, Kind);

547

550 "Loop in the Block Graph!");

551}

552

553#ifndef NDEBUG

555 dbgs() << "Block (" << ID << ")\n";

556 if (!full)

557 return;

558

559 dbgs() << "\nContains High Latency Instruction: "

560 << HighLatencyBlock << '\n';

561 dbgs() << "\nDepends On:\n";

563 P->printDebug(false);

564 }

565

566 dbgs() << "\nSuccessors:\n";

567 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {

568 if (S.second == SIScheduleBlockLinkKind::Data)

569 dbgs() << "(Data Dep) ";

570 S.first->printDebug(false);

571 }

572

573 if (Scheduled) {

574 dbgs() << "LiveInPressure "

575 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '

576 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';

577 dbgs() << "LiveOutPressure "

578 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '

579 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";

580 dbgs() << "LiveIns:\n";

581 for (Register Reg : LiveInRegs)

582 dbgs() << printReg(Reg, DAG->getTRI()) << ' ';

583

584 dbgs() << "\nLiveOuts:\n";

585 for (Register Reg : LiveOutRegs)

586 dbgs() << printReg(Reg, DAG->getTRI()) << ' ';

587 }

588

589 dbgs() << "\nInstructions:\n";

590 for (const SUnit* SU : SUnits)

591 DAG->dumpNode(*SU);

592

593 dbgs() << "///////////////////////\n";

594}

595#endif

596

597

598

601

604 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =

605 Blocks.find(BlockVariant);

606 if (B == Blocks.end()) {

608 createBlocksForVariant(BlockVariant);

609 topologicalSort();

610 scheduleInsideBlocks();

611 fillStats();

612 Res.Blocks = CurrentBlocks;

615 Blocks[BlockVariant] = Res;

616 return Res;

617 }

618 return B->second;

619}

620

622 if (SU->NodeNum >= DAG->SUnits.size())

623 return false;

624 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;

625}

626

627void SIScheduleBlockCreator::colorHighLatenciesAlone() {

628 unsigned DAGSize = DAG->SUnits.size();

629

630 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

633 CurrentColoring[SU->NodeNum] = NextReservedID++;

634 }

635 }

636}

637

638static bool

640 for (const auto &PredDep : SU.Preds) {

641 if (PredDep.getSUnit() == &FromSU &&

643 return true;

644 }

645 return false;

646}

647

648void SIScheduleBlockCreator::colorHighLatenciesGroups() {

649 unsigned DAGSize = DAG->SUnits.size();

650 unsigned NumHighLatencies = 0;

651 unsigned GroupSize;

652 int Color = NextReservedID;

653 unsigned Count = 0;

654 std::set FormingGroup;

655

656 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

657 SUnit *SU = &DAG->SUnits[i];

658 if (DAG->IsHighLatencySU[SU->NodeNum])

659 ++NumHighLatencies;

660 }

661

662 if (NumHighLatencies == 0)

663 return;

664

665 if (NumHighLatencies <= 6)

666 GroupSize = 2;

667 else if (NumHighLatencies <= 12)

668 GroupSize = 3;

669 else

670 GroupSize = 4;

671

672 for (unsigned SUNum : DAG->TopDownIndex2SU) {

673 const SUnit &SU = DAG->SUnits[SUNum];

674 if (DAG->IsHighLatencySU[SU.NodeNum]) {

675 unsigned CompatibleGroup = true;

676 int ProposedColor = Color;

677 std::vector AdditionalElements;

678

679

680

681

682

683

684

685

686

687

688

689 for (unsigned j : FormingGroup) {

690 bool HasSubGraph;

691 std::vector SubGraph;

692

693

694

695#ifndef NDEBUG

696 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],

697 HasSubGraph);

698 assert(!HasSubGraph);

699#endif

700 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,

701 HasSubGraph);

702 if (!HasSubGraph)

703 continue;

704 if (SubGraph.size() > 5) {

705

706 CompatibleGroup = false;

707 break;

708 }

709

710 for (unsigned k : SubGraph) {

711

712

713

714

715 if (DAG->IsHighLatencySU[k] || (CurrentColoring[k] != ProposedColor &&

716 CurrentColoring[k] != 0)) {

717 CompatibleGroup = false;

718 break;

719 }

720

721

723 CompatibleGroup = false;

724 break;

725 }

726 }

727 if (!CompatibleGroup)

728 break;

729

731 CompatibleGroup = false;

732 break;

733 }

734

735

736

737

738

740 }

741 if (CompatibleGroup) {

742 FormingGroup.insert(SU.NodeNum);

743 for (unsigned j : AdditionalElements)

744 CurrentColoring[j] = ProposedColor;

745 CurrentColoring[SU.NodeNum] = ProposedColor;

747 }

748

749

750

751 if (!CompatibleGroup) {

752 FormingGroup.clear();

753 Color = ++NextReservedID;

754 ProposedColor = Color;

755 FormingGroup.insert(SU.NodeNum);

756 CurrentColoring[SU.NodeNum] = ProposedColor;

758 } else if (Count == GroupSize) {

759 FormingGroup.clear();

760 Color = ++NextReservedID;

761 ProposedColor = Color;

763 }

764 }

765 }

766}

767

768void SIScheduleBlockCreator::colorComputeReservedDependencies() {

769 unsigned DAGSize = DAG->SUnits.size();

770 std::map<std::set, unsigned> ColorCombinations;

771

772 CurrentTopDownReservedDependencyColoring.clear();

773 CurrentBottomUpReservedDependencyColoring.clear();

774

775 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);

776 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);

777

778

779

780

781 for (unsigned SUNum : DAG->TopDownIndex2SU) {

782 SUnit *SU = &DAG->SUnits[SUNum];

783 std::set SUColors;

784

785

786 if (CurrentColoring[SU->NodeNum]) {

787 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =

788 CurrentColoring[SU->NodeNum];

789 continue;

790 }

791

792 for (SDep& PredDep : SU->Preds) {

793 SUnit *Pred = PredDep.getSUnit();

794 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)

795 continue;

796 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)

797 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);

798 }

799

800 if (SUColors.empty())

801 continue;

802

803 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)

804 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =

805 *SUColors.begin();

806 else {

808 ColorCombinations.try_emplace(SUColors, NextNonReservedID);

809 if (Inserted)

810 ++NextNonReservedID;

811 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;

812 }

813 }

814

815 ColorCombinations.clear();

816

817

818

819 for (unsigned SUNum : DAG->BottomUpIndex2SU) {

820 SUnit *SU = &DAG->SUnits[SUNum];

821 std::set SUColors;

822

823

824 if (CurrentColoring[SU->NodeNum]) {

825 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =

826 CurrentColoring[SU->NodeNum];

827 continue;

828 }

829

830 for (SDep& SuccDep : SU->Succs) {

831 SUnit *Succ = SuccDep.getSUnit();

832 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

833 continue;

834 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)

835 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);

836 }

837

838 if (SUColors.empty())

839 continue;

840

841 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)

842 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =

843 *SUColors.begin();

844 else {

845 std::map<std::set, unsigned>::iterator Pos =

846 ColorCombinations.find(SUColors);

847 if (Pos != ColorCombinations.end()) {

848 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;

849 } else {

850 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =

851 NextNonReservedID;

852 ColorCombinations[SUColors] = NextNonReservedID++;

853 }

854 }

855 }

856}

857

858void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {

859 std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;

860

861

862

863

864 for (const SUnit &SU : DAG->SUnits) {

865 std::pair<unsigned, unsigned> SUColors;

866

867

868 if (CurrentColoring[SU.NodeNum])

869 continue;

870

871 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum];

872 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum];

873

875 ColorCombinations.try_emplace(SUColors, NextNonReservedID);

876 CurrentColoring[SU.NodeNum] = Pos->second;

877 if (Inserted)

878 NextNonReservedID++;

879 }

880}

881

882void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {

883 unsigned DAGSize = DAG->SUnits.size();

884 std::vector PendingColoring = CurrentColoring;

885

886 assert(DAGSize >= 1 &&

887 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&

888 CurrentTopDownReservedDependencyColoring.size() == DAGSize);

889

890

891 if (*llvm::max_element(CurrentBottomUpReservedDependencyColoring) == 0 &&

892 *llvm::max_element(CurrentTopDownReservedDependencyColoring) == 0)

893 return;

894

895 for (unsigned SUNum : DAG->BottomUpIndex2SU) {

896 SUnit *SU = &DAG->SUnits[SUNum];

897 std::set SUColors;

898 std::set SUColorsPending;

899

900 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

901 continue;

902

903 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||

904 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)

905 continue;

906

907 for (SDep& SuccDep : SU->Succs) {

908 SUnit *Succ = SuccDep.getSUnit();

909 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

910 continue;

911 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||

912 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)

913 SUColors.insert(CurrentColoring[Succ->NodeNum]);

914 SUColorsPending.insert(PendingColoring[Succ->NodeNum]);

915 }

916

917

918

919 if (SUColors.size() == 1 && SUColorsPending.size() == 1)

920 PendingColoring[SU->NodeNum] = *SUColors.begin();

921 else

922

923 PendingColoring[SU->NodeNum] = NextNonReservedID++;

924 }

925 CurrentColoring = PendingColoring;

926}

927

928

929void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {

930 unsigned DAGSize = DAG->SUnits.size();

931 unsigned PreviousColor;

932 std::set SeenColors;

933

934 if (DAGSize <= 1)

935 return;

936

937 PreviousColor = CurrentColoring[0];

938

939 for (unsigned i = 1, e = DAGSize; i != e; ++i) {

940 SUnit *SU = &DAG->SUnits[i];

941 unsigned CurrentColor = CurrentColoring[i];

942 unsigned PreviousColorSave = PreviousColor;

944

945 if (CurrentColor != PreviousColor)

946 SeenColors.insert(PreviousColor);

947 PreviousColor = CurrentColor;

948

949 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

950 continue;

951

952 if (SeenColors.find(CurrentColor) == SeenColors.end())

953 continue;

954

955 if (PreviousColorSave != CurrentColor)

956 CurrentColoring[i] = NextNonReservedID++;

957 else

958 CurrentColoring[i] = CurrentColoring[i-1];

959 }

960}

961

962void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {

963 unsigned DAGSize = DAG->SUnits.size();

964

965 for (unsigned SUNum : DAG->BottomUpIndex2SU) {

966 SUnit *SU = &DAG->SUnits[SUNum];

967 std::set SUColors;

968

969 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

970 continue;

971

972

973

974 if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])

975 continue;

976

977 for (SDep& SuccDep : SU->Succs) {

978 SUnit *Succ = SuccDep.getSUnit();

979 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

980 continue;

981 SUColors.insert(CurrentColoring[Succ->NodeNum]);

982 }

983 if (SUColors.size() == 1)

984 CurrentColoring[SU->NodeNum] = *SUColors.begin();

985 }

986}

987

988void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {

989 unsigned DAGSize = DAG->SUnits.size();

990

991 for (unsigned SUNum : DAG->BottomUpIndex2SU) {

992 SUnit *SU = &DAG->SUnits[SUNum];

993 std::set SUColors;

994

995 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

996 continue;

997

998 for (SDep& SuccDep : SU->Succs) {

999 SUnit *Succ = SuccDep.getSUnit();

1000 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1001 continue;

1002 SUColors.insert(CurrentColoring[Succ->NodeNum]);

1003 }

1004 if (SUColors.size() == 1)

1005 CurrentColoring[SU->NodeNum] = *SUColors.begin();

1006 }

1007}

1008

1009void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {

1010 unsigned DAGSize = DAG->SUnits.size();

1011

1012 for (unsigned SUNum : DAG->BottomUpIndex2SU) {

1013 SUnit *SU = &DAG->SUnits[SUNum];

1014 std::set SUColors;

1015

1016 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

1017 continue;

1018

1019 for (SDep& SuccDep : SU->Succs) {

1020 SUnit *Succ = SuccDep.getSUnit();

1021 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1022 continue;

1023 SUColors.insert(CurrentColoring[Succ->NodeNum]);

1024 }

1025 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)

1026 CurrentColoring[SU->NodeNum] = *SUColors.begin();

1027 }

1028}

1029

1030void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {

1031 unsigned DAGSize = DAG->SUnits.size();

1032 std::map<unsigned, unsigned> ColorCount;

1033

1034 for (unsigned SUNum : DAG->BottomUpIndex2SU) {

1035 SUnit *SU = &DAG->SUnits[SUNum];

1036 unsigned color = CurrentColoring[SU->NodeNum];

1037 ++ColorCount[color];

1038 }

1039

1040 for (unsigned SUNum : DAG->BottomUpIndex2SU) {

1041 SUnit *SU = &DAG->SUnits[SUNum];

1042 unsigned color = CurrentColoring[SU->NodeNum];

1043 std::set SUColors;

1044

1045 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

1046 continue;

1047

1048 if (ColorCount[color] > 1)

1049 continue;

1050

1051 for (SDep& SuccDep : SU->Succs) {

1052 SUnit *Succ = SuccDep.getSUnit();

1053 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1054 continue;

1055 SUColors.insert(CurrentColoring[Succ->NodeNum]);

1056 }

1057 if (SUColors.size() == 1 && *SUColors.begin() != color) {

1058 --ColorCount[color];

1059 CurrentColoring[SU->NodeNum] = *SUColors.begin();

1060 ++ColorCount[*SUColors.begin()];

1061 }

1062 }

1063}

1064

1065void SIScheduleBlockCreator::cutHugeBlocks() {

1066

1067}

1068

1069void SIScheduleBlockCreator::regroupNoUserInstructions() {

1070 unsigned DAGSize = DAG->SUnits.size();

1071 int GroupID = NextNonReservedID++;

1072

1073 for (unsigned SUNum : DAG->BottomUpIndex2SU) {

1074 SUnit *SU = &DAG->SUnits[SUNum];

1075 bool hasSuccessor = false;

1076

1077 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)

1078 continue;

1079

1080 for (SDep& SuccDep : SU->Succs) {

1081 SUnit *Succ = SuccDep.getSUnit();

1082 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1083 continue;

1084 hasSuccessor = true;

1085 }

1086 if (!hasSuccessor)

1087 CurrentColoring[SU->NodeNum] = GroupID;

1088 }

1089}

1090

1091void SIScheduleBlockCreator::colorExports() {

1092 unsigned ExportColor = NextNonReservedID++;

1093 SmallVector<unsigned, 8> ExpGroup;

1094

1095

1096

1097

1098

1099

1100

1101

1102

1103

1104

1105 for (unsigned SUNum : DAG->TopDownIndex2SU) {

1106 const SUnit &SU = DAG->SUnits[SUNum];

1108

1109

1110 for (const SDep &SuccDep : SU.Succs) {

1111 const SUnit *SuccSU = SuccDep.getSUnit();

1112 if (SuccDep.isWeak() || SuccSU->NodeNum >= DAG->SUnits.size()) {

1113

1114 continue;

1115 }

1117 "SUnit unexpectedly not representing an instruction!");

1118

1120

1121

1122

1123

1124

1125 return;

1126 }

1127 }

1129 }

1130 }

1131

1132

1133 for (unsigned j : ExpGroup)

1134 CurrentColoring[j] = ExportColor;

1135}

1136

1138 unsigned DAGSize = DAG->SUnits.size();

1139 std::map<unsigned,unsigned> RealID;

1140

1141 CurrentBlocks.clear();

1142 CurrentColoring.clear();

1143 CurrentColoring.resize(DAGSize, 0);

1144 Node2CurrentBlock.clear();

1145

1146

1147 DAG->restoreSULinksLeft();

1148

1149 NextReservedID = 1;

1150 NextNonReservedID = DAGSize + 1;

1151

1153

1155 colorHighLatenciesGroups();

1156 else

1157 colorHighLatenciesAlone();

1158 colorComputeReservedDependencies();

1159 colorAccordingToReservedDependencies();

1160 colorEndsAccordingToDependencies();

1162 colorForceConsecutiveOrderInGroup();

1163 regroupNoUserInstructions();

1164 colorMergeConstantLoadsNextGroup();

1165 colorMergeIfPossibleNextGroupOnlyForReserved();

1166 colorExports();

1167

1168

1169 Node2CurrentBlock.resize(DAGSize, -1);

1170 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1171 SUnit *SU = &DAG->SUnits[i];

1172 unsigned Color = CurrentColoring[SU->NodeNum];

1173 auto [It, Inserted] = RealID.try_emplace(Color);

1174 if (Inserted) {

1175 int ID = CurrentBlocks.size();

1176 BlockPtrs.push_back(std::make_unique(DAG, this, ID));

1177 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());

1178 It->second = ID;

1179 }

1180 CurrentBlocks[It->second]->addUnit(SU);

1181 Node2CurrentBlock[SU->NodeNum] = It->second;

1182 }

1183

1184

1185 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1186 SUnit *SU = &DAG->SUnits[i];

1187 int SUID = Node2CurrentBlock[i];

1188 for (SDep& SuccDep : SU->Succs) {

1189 SUnit *Succ = SuccDep.getSUnit();

1190 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1191 continue;

1192 if (Node2CurrentBlock[Succ->NodeNum] != SUID)

1193 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],

1195 }

1196 for (SDep& PredDep : SU->Preds) {

1197 SUnit *Pred = PredDep.getSUnit();

1198 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)

1199 continue;

1200 if (Node2CurrentBlock[Pred->NodeNum] != SUID)

1201 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);

1202 }

1203 }

1204

1205

1206 for (SIScheduleBlock *Block : CurrentBlocks)

1207 Block->finalizeUnits();

1209 dbgs() << "Blocks created:\n\n";

1210 for (SIScheduleBlock *Block : CurrentBlocks)

1211 Block->printDebug(true);

1212 });

1213}

1214

1215

1216

1217

1221 for (; I != End; ++I) {

1222 if (I->isDebugInstr())

1223 break;

1224 }

1225 return I;

1226}

1227

1228void SIScheduleBlockCreator::topologicalSort() {

1229 unsigned DAGSize = CurrentBlocks.size();

1230 std::vector WorkList;

1231

1233

1234 WorkList.reserve(DAGSize);

1235 TopDownIndex2Block.resize(DAGSize);

1236 TopDownBlock2Index.resize(DAGSize);

1237 BottomUpIndex2Block.resize(DAGSize);

1238

1239 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1240 SIScheduleBlock *Block = CurrentBlocks[i];

1241 unsigned Degree = Block->getSuccs().size();

1242 TopDownBlock2Index[i] = Degree;

1243 if (Degree == 0) {

1244 WorkList.push_back(i);

1245 }

1246 }

1247

1248 int Id = DAGSize;

1249 while (!WorkList.empty()) {

1250 int i = WorkList.back();

1251 SIScheduleBlock *Block = CurrentBlocks[i];

1252 WorkList.pop_back();

1253 TopDownBlock2Index[i] = --Id;

1254 TopDownIndex2Block[Id] = i;

1255 for (SIScheduleBlock* Pred : Block->getPreds()) {

1256 if (!--TopDownBlock2Index[Pred->getID()])

1257 WorkList.push_back(Pred->getID());

1258 }

1259 }

1260

1261#ifndef NDEBUG

1262

1263 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1264 SIScheduleBlock *Block = CurrentBlocks[i];

1265 for (SIScheduleBlock* Pred : Block->getPreds()) {

1266 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&

1267 "Wrong Top Down topological sorting");

1268 }

1269 }

1270#endif

1271

1272 BottomUpIndex2Block = std::vector(TopDownIndex2Block.rbegin(),

1273 TopDownIndex2Block.rend());

1274}

1275

1276void SIScheduleBlockCreator::scheduleInsideBlocks() {

1277 unsigned DAGSize = CurrentBlocks.size();

1278

1280

1281

1282

1283 LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");

1284 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1285 SIScheduleBlock *Block = CurrentBlocks[i];

1286 Block->fastSchedule();

1287 }

1288

1289

1290

1291

1292

1294 std::vectorMachineBasicBlock::iterator PosOld;

1295 std::vectorMachineBasicBlock::iterator PosNew;

1296 PosOld.reserve(DAG->SUnits.size());

1297 PosNew.reserve(DAG->SUnits.size());

1298

1299 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1300 int BlockIndice = TopDownIndex2Block[i];

1301 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];

1302 std::vector<SUnit*> SUs = Block->getScheduledUnits();

1303

1304 for (SUnit* SU : SUs) {

1307 PosOld.push_back(Pos);

1308 if (&*CurrentTopFastSched == MI) {

1309 PosNew.push_back(Pos);

1310 CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,

1311 DAG->getCurrentBottom());

1312 } else {

1313

1314 DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);

1315

1316

1317

1318

1319

1320

1321 DAG->getLIS()->handleMove(*MI, true);

1322 PosNew.push_back(CurrentTopFastSched);

1323 }

1324 }

1325 }

1326

1327

1328

1329

1330

1331 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1332 SIScheduleBlock *Block = CurrentBlocks[i];

1333 std::vector<SUnit*> SUs = Block->getScheduledUnits();

1334 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());

1335 }

1336

1338

1339 for (unsigned i = PosOld.size(), e = 0; i != e; --i) {

1342 if (PNew != POld) {

1343

1344 DAG->getBB()->splice(POld, DAG->getBB(), PNew);

1345

1346

1347 DAG->getLIS()->handleMove(*POld, true);

1348 }

1349 }

1350

1352 for (SIScheduleBlock *Block : CurrentBlocks)

1353 Block->printDebug(true);

1354 });

1355}

1356

1357void SIScheduleBlockCreator::fillStats() {

1358 unsigned DAGSize = CurrentBlocks.size();

1359

1360 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1361 int BlockIndice = TopDownIndex2Block[i];

1362 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];

1363 if (Block->getPreds().empty())

1364 Block->Depth = 0;

1365 else {

1366 unsigned Depth = 0;

1367 for (SIScheduleBlock *Pred : Block->getPreds()) {

1369 Depth = Pred->Depth + Pred->getCost();

1370 }

1372 }

1373 }

1374

1375 for (unsigned i = 0, e = DAGSize; i != e; ++i) {

1376 int BlockIndice = BottomUpIndex2Block[i];

1377 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];

1378 if (Block->getSuccs().empty())

1379 Block->Height = 0;

1380 else {

1381 unsigned Height = 0;

1382 for (const auto &Succ : Block->getSuccs())

1383 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());

1384 Block->Height = Height;

1385 }

1386 }

1387}

1388

1389

1390

1394 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),

1395 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),

1396 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {

1397

1398

1399

1400

1401

1402

1403

1404

1405

1406

1407

1408 LiveOutRegsNumUsages.resize(Blocks.size());

1411 bool Found = false;

1412 int topoInd = -1;

1414 std::set PredOutRegs = Pred->getOutRegs();

1415 std::set::iterator RegPos = PredOutRegs.find(Reg);

1416

1417 if (RegPos != PredOutRegs.end()) {

1418 Found = true;

1421 }

1422 }

1423 }

1424

1425 if (!Found)

1426 continue;

1427

1429 ++LiveOutRegsNumUsages[PredID][Reg];

1430 }

1431 }

1432

1433 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);

1434 BlockNumPredsLeft.resize(Blocks.size());

1435 BlockNumSuccsLeft.resize(Blocks.size());

1436

1437 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {

1438 SIScheduleBlock *Block = Blocks[i];

1439 BlockNumPredsLeft[i] = Block->getPreds().size();

1440 BlockNumSuccsLeft[i] = Block->getSuccs().size();

1441 }

1442

1444 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {

1445 SIScheduleBlock *Block = Blocks[i];

1446 assert(Block->getID() == i);

1447 }

1448#endif

1449

1450 std::set InRegs = DAG->getInRegs();

1451 addLiveRegs(InRegs);

1452

1453

1454

1455

1456 for (VirtRegOrUnit VRegOrUnit : DAG->getOutRegs()) {

1457 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {

1458

1459 int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];

1460 SIScheduleBlock *Block = Blocks[ID];

1461 const std::set &OutRegs = Block->getOutRegs();

1462

1463 if (!VRegOrUnit.isVirtualReg() ||

1464 OutRegs.find(VRegOrUnit.asVirtualReg()) == OutRegs.end())

1465 continue;

1466

1467 ++LiveOutRegsNumUsages[ID][VRegOrUnit.asVirtualReg()];

1468 break;

1469 }

1470 }

1471

1472

1473

1476 bool Found = false;

1478 std::set PredOutRegs = Pred->getOutRegs();

1479 std::set::iterator RegPos = PredOutRegs.find(Reg);

1480

1481 if (RegPos != PredOutRegs.end()) {

1482 Found = true;

1483 break;

1484 }

1485 }

1486

1487 if (!Found)

1488 ++LiveRegsConsumers[Reg];

1489 }

1490 }

1491

1492 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {

1493 SIScheduleBlock *Block = Blocks[i];

1494 if (BlockNumPredsLeft[i] == 0) {

1495 ReadyBlocks.push_back(Block);

1496 }

1497 }

1498

1500 BlocksScheduled.push_back(Block);

1501 blockScheduled(Block);

1502 }

1503

1505 : BlocksScheduled) {

1506 dbgs() << ' ' << Block->getID();

1507 } dbgs() << '\n';);

1508}

1509

1510bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,

1511 SIBlockSchedCandidate &TryCand) {

1512 if (!Cand.isValid()) {

1514 return true;

1515 }

1516

1517

1519 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))

1520 return true;

1521

1523 TryCand, Cand, Latency))

1524 return true;

1525 if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,

1526 TryCand, Cand, Depth))

1527 return true;

1529 Cand.NumHighLatencySuccessors,

1531 return true;

1532 return false;

1533}

1534

1535bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,

1536 SIBlockSchedCandidate &TryCand) {

1537 if (!Cand.isValid()) {

1539 return true;

1540 }

1541

1542 if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,

1544 return true;

1546 Cand.NumSuccessors > 0,

1548 return true;

1550 return true;

1551 if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,

1553 return true;

1554 return false;

1555}

1556

1558 SIBlockSchedCandidate Cand;

1559 std::vector<SIScheduleBlock*>::iterator Best;

1560 SIScheduleBlock *Block;

1561 if (ReadyBlocks.empty())

1562 return nullptr;

1563

1564 DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),

1565 VregCurrentUsage, SregCurrentUsage);

1566 if (VregCurrentUsage > maxVregUsage)

1567 maxVregUsage = VregCurrentUsage;

1568 if (SregCurrentUsage > maxSregUsage)

1569 maxSregUsage = SregCurrentUsage;

1571 dbgs() << "Picking New Blocks\n";

1572 dbgs() << "Available: ";

1573 for (SIScheduleBlock *Block : ReadyBlocks)

1574 dbgs() << Block->getID() << ' ';

1575 dbgs() << "\nCurrent Live:\n";

1578 dbgs() << '\n';

1579 dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';

1580 dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';

1581 });

1582

1583 Cand.Block = nullptr;

1584 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),

1585 E = ReadyBlocks.end(); I != E; ++I) {

1586 SIBlockSchedCandidate TryCand;

1587 TryCand.Block = *I;

1588 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();

1589 TryCand.VGPRUsageDiff =

1590 checkRegUsageImpact(TryCand.Block->getInRegs(),

1591 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];

1592 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();

1593 TryCand.NumHighLatencySuccessors =

1594 TryCand.Block->getNumHighLatencySuccessors();

1595 TryCand.LastPosHighLatParentScheduled =

1596 (unsigned int) std::max (0,

1597 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -

1598 LastPosWaitedHighLatency);

1599 TryCand.Height = TryCand.Block->Height;

1600

1601 if (VregCurrentUsage > 120 ||

1603 if (!tryCandidateRegUsage(Cand, TryCand) &&

1605 tryCandidateLatency(Cand, TryCand);

1606 } else {

1607 if (!tryCandidateLatency(Cand, TryCand))

1608 tryCandidateRegUsage(Cand, TryCand);

1609 }

1610 if (TryCand.Reason != NoCand) {

1611 Cand.setBest(TryCand);

1612 Best = I;

1613 LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '

1615 }

1616 }

1617

1618 LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';

1619 dbgs() << "Is a block with high latency instruction: "

1620 << (Cand.IsHighLatency ? "yes\n" : "no\n");

1621 dbgs() << "Position of last high latency dependency: "

1622 << Cand.LastPosHighLatParentScheduled << '\n';

1623 dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';

1624 dbgs() << '\n';);

1625

1626 Block = Cand.Block;

1627 ReadyBlocks.erase(Best);

1629}

1630

1631

1632

1633void SIScheduleBlockScheduler::addLiveRegs(std::set &Regs) {

1634 for (VirtRegOrUnit VRegOrUnit : Regs) {

1635

1637 continue;

1638

1639 (void)LiveRegs.insert(VRegOrUnit.asVirtualReg());

1640 }

1641}

1642

1644 std::set &Regs) {

1646

1647 std::set::iterator Pos = LiveRegs.find(Reg);

1648 assert (Pos != LiveRegs.end() &&

1649 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&

1650 LiveRegsConsumers[Reg] >= 1);

1651 --LiveRegsConsumers[Reg];

1652 if (LiveRegsConsumers[Reg] == 0)

1653 LiveRegs.erase(Pos);

1654 }

1655}

1656

1657void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {

1659 if (--BlockNumPredsLeft[Block.first->getID()] == 0)

1660 ReadyBlocks.push_back(Block.first);

1661

1663 Block.second == SIScheduleBlockLinkKind::Data)

1664 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;

1665 }

1666}

1667

1669 decreaseLiveRegs(Block, Block->getInRegs());

1670 LiveRegs.insert(Block->getOutRegs().begin(), Block->getOutRegs().end());

1671 releaseBlockSuccs(Block);

1672 for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) {

1673

1674 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||

1675 LiveRegsConsumers[RegP.first] == 0);

1676 LiveRegsConsumers[RegP.first] += RegP.second;

1677 }

1678 if (LastPosHighLatencyParentScheduled[Block->getID()] >

1679 (unsigned)LastPosWaitedHighLatency)

1680 LastPosWaitedHighLatency =

1681 LastPosHighLatencyParentScheduled[Block->getID()];

1682 ++NumBlockScheduled;

1683}

1684

1685std::vector

1686SIScheduleBlockScheduler::checkRegUsageImpact(std::set &InRegs,

1687 std::set &OutRegs) {

1688 std::vector DiffSetPressure;

1689 DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);

1690

1692

1694 continue;

1695 if (LiveRegsConsumers[Reg] > 1)

1696 continue;

1697 PSetIterator PSetI = DAG->getMRI()->getPressureSets(VirtRegOrUnit(Reg));

1698 for (; PSetI.isValid(); ++PSetI) {

1699 DiffSetPressure[*PSetI] -= PSetI.getWeight();

1700 }

1701 }

1702

1704

1706 continue;

1707 PSetIterator PSetI = DAG->getMRI()->getPressureSets(VirtRegOrUnit(Reg));

1708 for (; PSetI.isValid(); ++PSetI) {

1709 DiffSetPressure[*PSetI] += PSetI.getWeight();

1710 }

1711 }

1712

1713 return DiffSetPressure;

1714}

1715

1716

1717

1720 SISchedulerBlockSchedulerVariant ScheduleVariant) {

1721 SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);

1723 std::vector<SIScheduleBlock*> ScheduledBlocks;

1725

1726 ScheduledBlocks = Scheduler.getBlocks();

1727

1729 std::vector<SUnit*> SUs = Block->getScheduledUnits();

1730

1733 }

1734

1737 return Res;

1738}

1739

1740

1741

1747

1749

1750

1751

1752

1753void SIScheduleDAGMI::topologicalSort() {

1755

1758}

1759

1760

1761

1762

1763

1764

1765

1766void SIScheduleDAGMI::moveLowLatencies() {

1767 unsigned DAGSize = SUnits.size();

1768 int LastLowLatencyUser = -1;

1769 int LastLowLatencyPos = -1;

1770

1771 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {

1772 SUnit *SU = &SUnits[ScheduledSUnits[i]];

1773 bool IsLowLatencyUser = false;

1774 unsigned MinPos = 0;

1775

1776 for (SDep& PredDep : SU->Preds) {

1779 IsLowLatencyUser = true;

1780 }

1781 if (Pred->NodeNum >= DAGSize)

1782 continue;

1783 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];

1784 if (PredPos >= MinPos)

1785 MinPos = PredPos + 1;

1786 }

1787

1788 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {

1789 unsigned BestPos = LastLowLatencyUser + 1;

1790 if ((int)BestPos <= LastLowLatencyPos)

1791 BestPos = LastLowLatencyPos + 1;

1792 if (BestPos < MinPos)

1793 BestPos = MinPos;

1794 if (BestPos < i) {

1795 for (unsigned u = i; u > BestPos; --u) {

1796 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];

1797 ScheduledSUnits[u] = ScheduledSUnits[u-1];

1798 }

1799 ScheduledSUnits[BestPos] = SU->NodeNum;

1800 ScheduledSUnitsInv[SU->NodeNum] = BestPos;

1801 }

1802 LastLowLatencyPos = BestPos;

1803 if (IsLowLatencyUser)

1804 LastLowLatencyUser = BestPos;

1805 } else if (IsLowLatencyUser) {

1806 LastLowLatencyUser = i;

1807

1808

1810 bool CopyForLowLat = false;

1811 for (SDep& SuccDep : SU->Succs) {

1812 SUnit *Succ = SuccDep.getSUnit();

1813 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)

1814 continue;

1815 if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {

1816 CopyForLowLat = true;

1817 }

1818 }

1819 if (!CopyForLowLat)

1820 continue;

1821 if (MinPos < i) {

1822 for (unsigned u = i; u > MinPos; --u) {

1823 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];

1824 ScheduledSUnits[u] = ScheduledSUnits[u-1];

1825 }

1826 ScheduledSUnits[MinPos] = SU->NodeNum;

1827 ScheduledSUnitsInv[SU->NodeNum] = MinPos;

1828 }

1829 }

1830 }

1831}

1832

1834 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {

1835 SUnits[i].isScheduled = false;

1836 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;

1837 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;

1838 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;

1839 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;

1840 }

1841}

1842

1843

1844template<typename _Iterator> void

1846 unsigned &VgprUsage, unsigned &SgprUsage) {

1847 VgprUsage = 0;

1848 SgprUsage = 0;

1849 for (_Iterator RegI = First; RegI != End; ++RegI) {

1851

1852 if (!Reg.isVirtual())

1853 continue;

1855 for (; PSetI.isValid(); ++PSetI) {

1856 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)

1858 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)

1860 }

1861 }

1862}

1863

1865{

1869

1872

1878

1879 topologicalSort();

1881

1882

1883

1884

1887

1888

1889

1890 SUnitsLinksBackup = SUnits;

1894

1898

1899 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {

1902 int64_t OffLatReg;

1903 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {

1905 bool OffsetIsScalable;

1906 if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,

1907 OffsetIsScalable, TRI))

1909 } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))

1911 }

1912

1916

1917

1918

1922 Variants[] = {

1924

1926

1927

1929

1930

1931 };

1932 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {

1933 Temp = Scheduler.scheduleVariant(v.first, v.second);

1935 Best = Temp;

1936 }

1937 }

1938

1939

1943 Variants[] = {

1944

1946

1949

1952 };

1953 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {

1954 Temp = Scheduler.scheduleVariant(v.first, v.second);

1956 Best = Temp;

1957 }

1958 }

1959

1960 ScheduledSUnits = Best.SUs;

1961 ScheduledSUnitsInv.resize(SUnits.size());

1962

1963 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {

1964 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;

1965 }

1966

1967 moveLowLatencies();

1968

1969

1970

1973

1974 for (unsigned I : ScheduledSUnits) {

1976

1978

1981 }

1982

1984

1986

1988 dbgs() << "*** Final schedule for "

1991 dbgs() << '\n';

1992 });

1993}

unsigned const MachineRegisterInfo * MRI

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Provides AMDGPU specific target descriptions.

static const Function * getParent(const Value *V)

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Machine Instruction Scheduler

static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)

If this iterator is a debug value, increment until reaching the End or a non-debug instruction.

Promote Memory to Register

Interface definition for SIInstrInfo.

static const char * getReasonStr(SIScheduleCandReason Reason)

Definition SIMachineScheduler.cpp:124

static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)

Definition SIMachineScheduler.cpp:639

static bool isDefBetween(Register Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)

Definition SIMachineScheduler.cpp:287

SI Machine Scheduler interface.

GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.

SlotIndex getInstructionIndex(const MachineInstr &Instr) const

Returns the base index of the given instruction.

MachineInstrBundleIterator< const MachineInstr > const_iterator

MachineInstrBundleIterator< MachineInstr > iterator

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

MachineOperand class - Representation of each machine instruction operand.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Iterate over the pressure sets affected by the given physical or virtual register.

unsigned getWeight() const

Wrapper class representing virtual and physical registers.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

@ Data

Regular data dependence (aka true-dependence).

bool isWeak() const

Tests if this a weak dependence.

bool isCtrl() const

Shorthand for getKind() != SDep::Data.

static bool isEXP(const MachineInstr &MI)

bool isLowLatencyInstruction(const MachineInstr &MI) const

bool isSUInBlock(SUnit *SU, unsigned ID)

Definition SIMachineScheduler.cpp:621

SIScheduleBlockCreator(SIScheduleDAGMI *DAG)

Definition SIMachineScheduler.cpp:599

SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)

Definition SIMachineScheduler.cpp:603

SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)

Definition SIMachineScheduler.cpp:1391

void fastSchedule()

Definition SIMachineScheduler.cpp:267

SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, unsigned ID)

ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const

void addPred(SIScheduleBlock *Pred)

Definition SIMachineScheduler.cpp:513

void printDebug(bool Full)

Definition SIMachineScheduler.cpp:554

void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)

Definition SIMachineScheduler.cpp:531

void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)

Definition SIMachineScheduler.cpp:374

void addUnit(SUnit *SU)

Functions for Block construction.

Definition SIMachineScheduler.cpp:176

bool isHighLatencyBlock()

void finalizeUnits()

Definition SIMachineScheduler.cpp:502

void restoreSULinksLeft()

Definition SIMachineScheduler.cpp:1833

std::vector< int > BottomUpIndex2SU

std::vector< unsigned > IsHighLatencySU

std::vector< unsigned > LowLatencyOffset

std::vector< int > TopDownIndex2SU

void schedule() override

Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.

Definition SIMachineScheduler.cpp:1864

void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)

Definition SIMachineScheduler.cpp:1845

SIScheduleDAGMI(MachineSchedContext *C)

Definition SIMachineScheduler.cpp:1742

std::vector< unsigned > IsLowLatencySU

~SIScheduleDAGMI() override

struct SIScheduleBlockResult scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, SISchedulerBlockSchedulerVariant ScheduleVariant)

Definition SIMachineScheduler.cpp:1719

SIScheduler(SIScheduleDAGMI *DAG)

Scheduling unit. This is a node in the scheduling DAG.

bool isInstr() const

Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.

unsigned NodeNum

Entry # of node in the node vector.

bool isScheduled

True once scheduled.

SmallVector< SDep, 4 > Succs

All sunit successors.

SmallVector< SDep, 4 > Preds

All sunit predecessors.

LLVM_ABI bool addPred(const SDep &D, bool Required=true)

Adds the specified edge as a pred of the current node if not already.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

ScheduleDAGTopologicalSort Topo

Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.

MachineBasicBlock::iterator begin() const

Returns an iterator to the top of the current scheduling region.

MachineBasicBlock::iterator RegionBegin

The beginning of the range to be scheduled.

void scheduleMI(SUnit *SU, bool IsTopNode)

Move an instruction and update register pressure.

void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)

Release ExitSU predecessors and setup scheduler queues.

void buildDAGWithRegPressure()

Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.

ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)

void dump() const override

RegPressureTracker TopRPTracker

void dumpSchedule() const

dump the scheduled Sequence.

std::unique_ptr< MachineSchedStrategy > SchedImpl

void postProcessDAG()

Apply each ScheduleDAGMutation step in order.

void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)

MachineBasicBlock::iterator CurrentBottom

The bottom of the unscheduled zone.

void viewGraph() override

Out-of-line implementation with no arguments is handy for gdb.

void placeDebugValues()

Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.

MachineBasicBlock::iterator CurrentTop

The top of the unscheduled zone.

LLVM_ABI void InitDAGTopologicalSorting()

Creates the initial topological ordering from the DAG to be scheduled.

reverse_iterator rbegin()

MachineRegisterInfo & MRI

Virtual/real register map.

const TargetInstrInfo * TII

Target instruction information.

std::vector< SUnit > SUnits

The scheduling units.

const TargetRegisterInfo * TRI

Target processor register info.

SlotIndex - An opaque wrapper around machine indexes.

SlotIndex getRegSlot(bool EC=false) const

Returns the register use/def slot in the current instruction for a normal or early-clobber def.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Wrapper class representing a virtual register or register unit.

constexpr bool isVirtualReg() const

constexpr Register asVirtualReg() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)

Definition SIMachineScheduler.cpp:156

static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)

Definition SIMachineScheduler.cpp:139

This is an optimization pass for GlobalISel generic memory operations.

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

SISchedulerBlockSchedulerVariant

cl::opt< bool > ViewMISchedDAGs

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

FunctionAddr VTableAddr Count

SISchedulerBlockCreatorVariant

@ LatenciesAlonePlusConsecutive

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

FunctionAddr VTableAddr uintptr_t uintptr_t Data

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

cl::opt< bool > PrintDAGs

Implement std::hash so that hash_code can be used in STL containers.

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

std::vector< unsigned > MaxSetPressure

Map of max reg pressure indexed by pressure set ID, not class ID.

std::vector< unsigned > SUs

std::vector< int > TopDownIndex2Block

std::vector< SIScheduleBlock * > Blocks

std::vector< int > TopDownBlock2Index

SIScheduleCandReason Reason

void setRepeat(SIScheduleCandReason R)