LLVM: lib/CodeGen/MachinePipeliner.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

72#include "llvm/Config/llvm-config.h"

83#include

84#include

85#include

86#include

87#include

88#include

89#include

90#include

91#include

92#include

93#include

94#include

95#include

96#include

97

98using namespace llvm;

99

100#define DEBUG_TYPE "pipeliner"

101

102STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");

103STATISTIC(NumPipelined, "Number of loops software pipelined");

104STATISTIC(NumNodeOrderIssues, "Number of node order issues found");

105STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");

106STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");

107STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");

108STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");

109STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");

110STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");

111STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");

112STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");

113STATISTIC(NumFailTooManyStores, "Pipeliner abort due to too many stores");

114

115

117 cl::desc("Enable Software Pipelining"));

118

119

123

124

126 cl::desc("Size limit for the MII."),

128

129

130

132 cl::desc("Force pipeliner to use specified II."),

134

135

138 cl::desc("Maximum stages allowed in the generated scheduled."),

140

141

142

145 cl::desc("Prune dependences between unrelated Phi nodes."),

147

148

149

152 cl::desc("Prune loop carried order dependences."),

154

155#ifndef NDEBUG

157#endif

158

162

167

170 cl::desc("Instead of emitting the pipelined code, annotate instructions "

171 "with the generated schedule for feeding into the "

172 "-modulo-schedule-test pass"));

173

177 "Use the experimental peeling code generator for software pipelining"));

178

180 cl::desc("Range to search for II"),

182

185 cl::desc("Limit register pressure of scheduled loop"));

186

190 cl::desc("Margin representing the unused percentage of "

191 "the register pressure limit"));

192

195 cl::desc("Use the MVE code generator for software pipelining"));

196

197

198

200 "pipeliner-max-num-stores",

201 cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden,

203

204

208 cl::desc("Enable CopyToPhi DAG Mutation"));

209

210

211

213 "pipeliner-force-issue-width",

214 cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,

216

217

220 cl::desc("Set how to use window scheduling algorithm."),

222 "Turn off window algorithm."),

224 "Use window algorithm after SMS algorithm fails."),

226 "Use window algorithm instead of SMS algorithm.")));

227

228unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;

230#ifndef NDEBUG

232#endif

234

236 "Modulo Software Pipelining", false, false)

243

244namespace {

245

246

247

251

252

254

255

257

259

260

262

264

266

268

269private:

271};

272

273

274

275

277

278 enum class InstrTag {

279 Barrier = 0,

280 LoadOrStore = 1,

281

282 FPExceptions = 2,

283

284 };

285

287 TaggedSUnit(SUnit *SU, InstrTag Tag)

289

290 InstrTag getTag() const { return InstrTag(getInt()); }

291 };

292

293

294 struct LoadStoreChunk {

297

298 void append(SUnit *SU);

299 };

300

303 std::vector &SUnits;

304

305

306 const unsigned N;

307

308

309 std::vector LoopCarried;

310

311

312

313

314

315

316

317

318

319

320

321

322 std::vector TaggedSUnits;

323

326

327public:

331

332

334

336 return LoopCarried[Idx];

337 }

338

339private:

340

341 std::optional getInstrTag(SUnit *SU) const;

342

343 void addLoopCarriedDepenenciesForChunks(const LoadStoreChunk &From,

344 const LoadStoreChunk &To);

345

346

347

348

349

350

351

352

355 bool PerformCheapCheck = false);

356

357 void computeDependenciesAux();

358};

359

360}

361

362

365 return false;

366

368 return false;

369

372 return false;

373

375 return false;

376

377

378

382 return false;

383

384 MF = &mf;

388 TII = MF->getSubtarget().getInstrInfo();

390

391 for (const auto &L : *MLI)

392 scheduleLoop(*L);

393

394 return false;

395}

396

397

398

399

400

401bool MachinePipeliner::scheduleLoop(MachineLoop &L) {

403 for (const auto &InnerLoop : L)

404 Changed |= scheduleLoop(*InnerLoop);

405

406#ifndef NDEBUG

407

409 if (Limit >= 0) {

413 }

414#endif

415

416 setPragmaPipelineOptions(L);

417 if (!canPipelineLoop(L)) {

418 LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");

421 L.getStartLoc(), L.getHeader())

422 << "Failed to pipeline loop";

423 });

424

425 LI.LoopPipelinerInfo.reset();

427 }

428

429 ++NumTrytoPipeline;

430 if (useSwingModuloScheduler())

431 Changed = swingModuloScheduler(L);

432

433 if (useWindowScheduler(Changed))

434 Changed = runWindowScheduler(L);

435

436 LI.LoopPipelinerInfo.reset();

438}

439

440void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {

441

444

445 MachineBasicBlock *LBLK = L.getTopBlock();

446

447 if (LBLK == nullptr)

448 return;

449

451 if (BBLK == nullptr)

452 return;

453

455 if (TI == nullptr)

456 return;

457

458 MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);

459 if (LoopID == nullptr)

460 return;

461

464

467

468 if (MD == nullptr)

469 continue;

470

472

473 if (S == nullptr)

474 continue;

475

476 if (S->getString() == "llvm.loop.pipeline.initiationinterval") {

478 "Pipeline initiation interval hint metadata should have two operands.");

481 assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");

482 } else if (S->getString() == "llvm.loop.pipeline.disable") {

484 }

485 }

486}

487

488

489

493

494

495 auto It = PhiDeps.find(Reg);

496 if (It == PhiDeps.end())

497 return false;

498

500 return true;

502 return false;

503

506

507 for (unsigned Dep : It->second) {

509 return true;

510 }

511

513 return false;

514}

515

519

520

522 unsigned DefReg = MI.getOperand(0).getReg();

523 auto Ins = PhiDeps.try_emplace(DefReg).first;

524

525

526 for (unsigned I = 1; I < MI.getNumOperands(); I += 2)

527 Ins->second.push_back(MI.getOperand(I).getReg());

528 }

529

530

532

533

534 for (const auto &KV : PhiDeps) {

535 unsigned Reg = KV.first;

537 return true;

538 }

539

540 return false;

541}

542

543

544

545

546bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {

547 if (L.getNumBlocks() != 1) {

548 ORE->emit([&]() {

549 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

550 L.getStartLoc(), L.getHeader())

551 << "Not a single basic block: "

552 << ore::NV("NumBlocks", L.getNumBlocks());

553 });

554 return false;

555 }

556

558 LLVM_DEBUG(dbgs() << "Cannot pipeline loop due to PHI cycle\n");

559 return false;

560 }

561

563 ORE->emit([&]() {

564 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

565 L.getStartLoc(), L.getHeader())

566 << "Disabled by Pragma.";

567 });

568 return false;

569 }

570

571

572

573 LI.TBB = nullptr;

574 LI.FBB = nullptr;

575 LI.BrCond.clear();

576 if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {

577 LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");

578 NumFailBranch++;

579 ORE->emit([&]() {

580 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

581 L.getStartLoc(), L.getHeader())

582 << "The branch can't be understood";

583 });

584 return false;

585 }

586

587 LI.LoopInductionVar = nullptr;

588 LI.LoopCompare = nullptr;

589 LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());

590 if (LI.LoopPipelinerInfo) {

591 LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");

592 NumFailLoop++;

593 ORE->emit([&]() {

594 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

595 L.getStartLoc(), L.getHeader())

596 << "The loop structure is not supported";

597 });

598 return false;

599 }

600

601 if (L.getLoopPreheader()) {

602 LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");

603 NumFailPreheader++;

604 ORE->emit([&]() {

605 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

606 L.getStartLoc(), L.getHeader())

607 << "No loop preheader found";

608 });

609 return false;

610 }

611

612 unsigned NumStores = 0;

613 for (MachineInstr &MI : *L.getHeader())

614 if (MI.mayStore())

615 ++NumStores;

618 NumFailTooManyStores++;

619 ORE->emit([&]() {

620 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

621 L.getStartLoc(), L.getHeader())

622 << "Too many store instructions in the loop: "

623 << ore::NV("NumStores", NumStores) << " > "

625 });

626 return false;

627 }

628

629

630 preprocessPhiNodes(*L.getHeader());

631 return true;

632}

633

635 MachineRegisterInfo &MRI = MF->getRegInfo();

636 SlotIndexes &Slots =

638

639 for (MachineInstr &PI : B.phis()) {

640 MachineOperand &DefOp = PI.getOperand(0);

642 auto *RC = MRI.getRegClass(DefOp.getReg());

643

644 for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {

645 MachineOperand &RegOp = PI.getOperand(i);

647 continue;

648

649

650

651 Register NewReg = MRI.createVirtualRegister(RC);

652 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();

655 auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)

659 RegOp.setReg(NewReg);

661 }

662 }

663}

664

665

666

667

668

669bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {

670 assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");

671

673 SwingSchedulerDAG SMS(

676

677 MachineBasicBlock *MBB = L.getHeader();

678

679

680 SMS.startBlock(MBB);

681

682

683

688 ;

689

691 SMS.schedule();

692 SMS.exitRegion();

693

694 SMS.finishBlock();

695 return SMS.hasNewSchedule();

696}

697

708

709bool MachinePipeliner::runWindowScheduler(MachineLoop &L) {

711 Context.MF = MF;

712 Context.MLI = MLI;

713 Context.MDT = MDT;

717 Context.RegClassInfo->runOnMachineFunction(*MF);

719 return WS.run();

720}

721

722bool MachinePipeliner::useSwingModuloScheduler() {

723

725}

726

727bool MachinePipeliner::useWindowScheduler(bool Changed) {

728

729

730

731

733 LLVM_DEBUG(dbgs() << "Window scheduling is disabled when "

734 "llvm.loop.pipeline.initiationinterval is set.\n");

735 return false;

736 }

737

740}

741

742void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {

745 else if (II_setByPragma > 0)

746 MII = II_setByPragma;

747 else

748 MII = std::max(ResMII, RecMII);

749}

750

751void SwingSchedulerDAG::setMAX_II() {

754 else if (II_setByPragma > 0)

755 MAX_II = II_setByPragma;

756 else

758}

759

760

761

765 updatePhiDependences();

766 Topo.InitDAGTopologicalSorting();

767 changeDependences();

768 postProcessDAG();

769 DDG = std::make_unique(SUnits, &EntrySU, &ExitSU, LCE);

772 dbgs() << "===== Loop Carried Edges Begin =====\n";

775 dbgs() << "===== Loop Carried Edges End =====\n";

776 });

777

778 NodeSetType NodeSets;

779 findCircuits(NodeSets);

780 NodeSetType Circuits = NodeSets;

781

782

783 unsigned ResMII = calculateResMII();

784 unsigned RecMII = calculateRecMII(NodeSets);

785

786 fuseRecs(NodeSets);

787

788

790 RecMII = 0;

791

792 setMII(ResMII, RecMII);

793 setMAX_II();

794

795 LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II

796 << " (rec=" << RecMII << ", res=" << ResMII << ")\n");

797

798

799 if (MII == 0) {

800 LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");

801 NumFailZeroMII++;

802 Pass.ORE->emit([&]() {

804 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

805 << "Invalid Minimal Initiation Interval: 0";

806 });

807 return;

808 }

809

810

813 << ", we don't pipeline large loops\n");

814 NumFailLargeMaxMII++;

815 Pass.ORE->emit([&]() {

817 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

818 << "Minimal Initiation Interval too large: "

819 << ore::NV("MII", (int)MII) << " > "

821 << "Refer to -pipeliner-max-mii.";

822 });

823 return;

824 }

825

826 computeNodeFunctions(NodeSets);

827

828 registerPressureFilter(NodeSets);

829

830 colocateNodeSets(NodeSets);

831

832 checkNodeSets(NodeSets);

833

835 for (auto &I : NodeSets) {

836 dbgs() << " Rec NodeSet ";

837 I.dump();

838 }

839 });

840

842

843 groupRemainingNodes(NodeSets);

844

845 removeDuplicateNodes(NodeSets);

846

848 for (auto &I : NodeSets) {

849 dbgs() << " NodeSet ";

850 I.dump();

851 }

852 });

853

854 computeNodeOrder(NodeSets);

855

856

857 checkValidNodeOrder(Circuits);

858

860 Scheduled = schedulePipeline(Schedule);

861

862 if (!Scheduled){

864 NumFailNoSchedule++;

865 Pass.ORE->emit([&]() {

867 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

868 << "Unable to find schedule";

869 });

870 return;

871 }

872

874

875 if (numStages == 0) {

876 LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");

877 NumFailZeroStage++;

878 Pass.ORE->emit([&]() {

880 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

881 << "No need to pipeline - no overlapped iterations in schedule.";

882 });

883 return;

884 }

885

888 << " : too many stages, abort\n");

889 NumFailLargeMaxStage++;

890 Pass.ORE->emit([&]() {

892 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

893 << "Too many stages in schedule: "

894 << ore::NV("numStages", (int)numStages) << " > "

896 << ". Refer to -pipeliner-max-stages.";

897 });

898 return;

899 }

900

901 Pass.ORE->emit([&]() {

903 Loop.getHeader())

904 << "Pipelined succesfully!";

905 });

906

907

909 std::vector<MachineInstr *> OrderedInsts;

913 OrderedInsts.push_back(SU->getInstr());

914 Cycles[SU->getInstr()] = Cycle;

915 Stages[SU->getInstr()] = Schedule.stageScheduled(SU);

916 }

917 }

919 for (auto &KV : NewMIs) {

920 Cycles[KV.first] = Cycles[KV.second];

921 Stages[KV.first] = Stages[KV.second];

922 NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];

923 }

924

925 ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),

926 std::move(Stages));

929 "Cannot serialize a schedule with InstrChanges!");

932 return;

933 }

934

939 LoopPipelinerInfo->isMVEExpanderSupported() &&

943 } else {

947 }

948 ++NumPipelined;

949}

950

951

953 for (auto &KV : NewMIs)

954 MF.deleteMachineInstr(KV.second);

955 NewMIs.clear();

956

957

959}

960

961

962

965 assert(Phi.isPHI() && "Expecting a Phi.");

966

969 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

970 if (Phi.getOperand(i + 1).getMBB() != Loop)

971 InitVal = Phi.getOperand(i).getReg();

972 else

973 LoopVal = Phi.getOperand(i).getReg();

974

975 assert(InitVal && LoopVal && "Unexpected Phi structure.");

976}

977

978

981 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

982 if (Phi.getOperand(i + 1).getMBB() == LoopBB)

983 return Phi.getOperand(i).getReg();

985}

986

987

992 while (!Worklist.empty()) {

994 for (const auto &SI : SU->Succs) {

995 SUnit *SuccSU = SI.getSUnit();

997 if (Visited.count(SuccSU))

998 continue;

999 if (SuccSU == SUb)

1000 return true;

1002 Visited.insert(SuccSU);

1003 }

1004 }

1005 }

1006 return false;

1007}

1008

1010 if (!getUnderlyingObjects())

1011 return;

1015 break;

1016 }

1017}

1018

1021

1022

1024 return false;

1027 return false;

1028 return true;

1029}

1030

1031

1032

1033

1034

1035bool SUnitWithMemInfo::getUnderlyingObjects() {

1037 if (MI->hasOneMemOperand())

1038 return false;

1041 return false;

1045

1046

1047

1049 return true;

1050}

1051

1052

1053

1054static bool

1059 if (Src.isTriviallyDisjoint(Dst))

1060 return false;

1062 return false;

1063

1066 if (PerformCheapCheck) {

1067

1068

1069

1070

1071

1073 int64_t Offset1, Offset2;

1074 bool Offset1IsScalable, Offset2IsScalable;

1075 if (TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,

1077 TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,

1080 Offset1IsScalable == Offset2IsScalable &&

1081 (int)Offset1 < (int)Offset2) {

1082 assert(TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&

1083 "What happened to the chain edge?");

1084 return true;

1085 }

1086 }

1087 }

1088

1090 return false;

1091

1092

1093

1094

1095 if (Src.isUnknown() || Dst.isUnknown())

1096 return true;

1097 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)

1098 return true;

1099

1103 return false;

1104

1105

1106

1107

1108 for (const Value *SrcObj : Src.UnderlyingObjs)

1109 for (const Value *DstObj : Dst.UnderlyingObjs)

1112 return true;

1113

1114 return false;

1115}

1116

1117void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {

1118 const MachineInstr *MI = SU->getInstr();

1119 if (MI->mayLoadOrStore())

1120 return;

1121 (MI->mayStore() ? Stores : Loads).emplace_back(SU);

1122}

1123

1127 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.size()),

1128 LoopCarried(N, BitVector(N)), TII(TII), TRI(TRI) {}

1129

1131

1132 for (auto &SU : SUnits) {

1133 auto Tagged = getInstrTag(&SU);

1134

1135

1136 if (!Tagged)

1137 continue;

1138 TaggedSUnits.emplace_back(&SU, *Tagged);

1139 }

1140

1141 computeDependenciesAux();

1142}

1143

1144std::optionalLoopCarriedOrderDepsTracker::InstrTag

1145LoopCarriedOrderDepsTracker::getInstrTag(SUnit *SU) const {

1147 if (TII->isGlobalMemoryObject(MI))

1148 return InstrTag::Barrier;

1149

1150 if (MI->mayStore() ||

1151 (MI->mayLoad() && MI->isDereferenceableInvariantLoad()))

1152 return InstrTag::LoadOrStore;

1153

1154 if (MI->mayRaiseFPException())

1155 return InstrTag::FPExceptions;

1156

1157 return std::nullopt;

1158}

1159

1160void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(

1161 const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,

1162 bool PerformCheapCheck) {

1163

1164 if (Src.SU == Dst.SU)

1165 return;

1166

1168 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);

1169}

1170

1171void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(

1172 const LoadStoreChunk &From, const LoadStoreChunk &To) {

1173

1174 for (const SUnitWithMemInfo &Src : From.Loads)

1175 for (const SUnitWithMemInfo &Dst : To.Stores)

1176

1177 addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);

1178

1179

1180 for (const SUnitWithMemInfo &Src : From.Stores)

1181 for (const SUnitWithMemInfo &Dst : To.Loads)

1182 addDependenciesBetweenSUs(Src, Dst);

1183

1184

1185 for (const SUnitWithMemInfo &Src : From.Stores)

1186 for (const SUnitWithMemInfo &Dst : To.Stores)

1187 addDependenciesBetweenSUs(Src, Dst);

1188}

1189

1190void LoopCarriedOrderDepsTracker::computeDependenciesAux() {

1192 for (const auto &TSU : TaggedSUnits) {

1193 InstrTag Tag = TSU.getTag();

1194 SUnit *SU = TSU.getPointer();

1195 switch (Tag) {

1196 case InstrTag::Barrier:

1197 Chunks.emplace_back();

1198 break;

1199 case InstrTag::LoadOrStore:

1200 Chunks.back().append(SU);

1201 break;

1202 case InstrTag::FPExceptions:

1203

1204 break;

1205 }

1206 }

1207

1208

1209

1210

1211 for (const LoadStoreChunk &Chunk : Chunks)

1212 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);

1213

1214

1215

1216

1217}

1218

1219

1220

1221

1222

1223

1224LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {

1225 LoopCarriedEdges LCE;

1226

1227

1229 LCODTracker.computeDependencies();

1230 for (unsigned I = 0; I != SUnits.size(); I++)

1231 for (const int Succ : LCODTracker.getLoopCarried(I).set_bits())

1233

1235 return LCE;

1236}

1237

1238

1239

1240

1241

1242

1243

1244void SwingSchedulerDAG::updatePhiDependences() {

1246 const TargetSubtargetInfo &ST = MF.getSubtarget();

1247

1248

1249 for (SUnit &I : SUnits) {

1250 RemoveDeps.clear();

1251

1254 MachineInstr *MI = I.getInstr();

1255

1256 for (const MachineOperand &MO : MI->operands()) {

1257 if (!MO.isReg())

1258 continue;

1260 if (MO.isDef()) {

1261

1263 UI = MRI.use_instr_begin(Reg),

1264 UE = MRI.use_instr_end();

1265 UI != UE; ++UI) {

1266 MachineInstr *UseMI = &*UI;

1267 SUnit *SU = getSUnit(UseMI);

1268 if (SU != nullptr && UseMI->isPHI()) {

1269 if (MI->isPHI()) {

1271 Dep.setLatency(1);

1272 I.addPred(Dep);

1273 } else {

1274 HasPhiDef = Reg;

1275

1276

1277

1278

1279

1280

1281

1282

1283

1284

1285

1286

1287

1290 }

1291 }

1292 }

1293 } else if (MO.isUse()) {

1294

1295 MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);

1296 if (DefMI == nullptr)

1297 continue;

1298 SUnit *SU = getSUnit(DefMI);

1299 if (SU != nullptr && DefMI->isPHI()) {

1300 if (MI->isPHI()) {

1302 Dep.setLatency(0);

1303 ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep,

1304 &SchedModel);

1305 I.addPred(Dep);

1306 } else {

1307 HasPhiUse = Reg;

1308

1309

1310 if (SU->NodeNum < I.NodeNum && I.isPred(SU))

1312 }

1313 }

1314 }

1315 }

1316

1318 continue;

1319 for (auto &PI : I.Preds) {

1320 MachineInstr *PMI = PI.getSUnit()->getInstr();

1322 if (I.getInstr()->isPHI()) {

1324 continue;

1326 continue;

1327 }

1329 }

1330 }

1331 for (const SDep &D : RemoveDeps)

1332 I.removePred(D);

1333 }

1334}

1335

1336

1337

1338void SwingSchedulerDAG::changeDependences() {

1339

1340

1341

1342 for (SUnit &I : SUnits) {

1343 unsigned BasePos = 0, OffsetPos = 0;

1345 int64_t NewOffset = 0;

1346 if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,

1347 NewOffset))

1348 continue;

1349

1350

1351 Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();

1352 MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);

1354 continue;

1355 SUnit *DefSU = getSUnit(DefMI);

1356 if (!DefSU)

1357 continue;

1358

1359 MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);

1360 if (!LastMI)

1361 continue;

1362 SUnit *LastSU = getSUnit(LastMI);

1363 if (!LastSU)

1364 continue;

1365

1366 if (Topo.IsReachable(&I, LastSU))

1367 continue;

1368

1369

1371 for (const SDep &P : I.Preds)

1372 if (P.getSUnit() == DefSU)

1374 for (const SDep &D : Deps) {

1375 Topo.RemovePred(&I, D.getSUnit());

1376 I.removePred(D);

1377 }

1378

1379 Deps.clear();

1380 for (auto &P : LastSU->Preds)

1381 if (P.getSUnit() == &I && P.getKind() == SDep::Order)

1382 Deps.push_back(P);

1383 for (const SDep &D : Deps) {

1384 Topo.RemovePred(LastSU, D.getSUnit());

1386 }

1387

1388

1389

1391 Topo.AddPred(LastSU, &I);

1393

1394

1395

1396 InstrChanges[&I] = std::make_pair(NewBase, NewOffset);

1397 }

1398}

1399

1400

1401

1402

1403

1404

1407 std::vector<MachineInstr *> &OrderedInsts,

1410

1411

1414 for (int Stage = 0, LastStage = Schedule.getMaxStageCount();

1415 Stage <= LastStage; ++Stage) {

1418 Instrs[Cycle].push_front(SU);

1419 }

1420 }

1421 }

1422

1425 std::deque<SUnit *> &CycleInstrs = Instrs[Cycle];

1427 for (SUnit *SU : CycleInstrs) {

1429 OrderedInsts.push_back(MI);

1431 }

1432 }

1433}

1434

1435namespace {

1436

1437

1438

1439struct FuncUnitSorter {

1440 const InstrItineraryData *InstrItins;

1441 const MCSubtargetInfo *STI;

1442 DenseMap<InstrStage::FuncUnits, unsigned> Resources;

1443

1444 FuncUnitSorter(const TargetSubtargetInfo &TSI)

1445 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}

1446

1447

1448

1449

1450 unsigned minFuncUnits(const MachineInstr *Inst,

1453 unsigned min = UINT_MAX;

1454 if (InstrItins && !InstrItins->isEmpty()) {

1455 for (const InstrStage &IS :

1457 InstrItins->endStage(SchedClass))) {

1459 unsigned numAlternatives = llvm::popcount(funcUnits);

1460 if (numAlternatives < min) {

1461 min = numAlternatives;

1462 F = funcUnits;

1463 }

1464 }

1465 return min;

1466 }

1468 const MCSchedClassDesc *SCDesc =

1471

1472

1473 return min;

1474

1475 for (const MCWriteProcResEntry &PRE :

1478 if (!PRE.ReleaseAtCycle)

1479 continue;

1480 const MCProcResourceDesc *ProcResource =

1482 unsigned NumUnits = ProcResource->NumUnits;

1483 if (NumUnits < min) {

1484 min = NumUnits;

1485 F = PRE.ProcResourceIdx;

1486 }

1487 }

1488 return min;

1489 }

1490 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");

1491 }

1492

1493

1494

1495

1496

1497

1498 void calcCriticalResources(MachineInstr &MI) {

1499 unsigned SchedClass = MI.getDesc().getSchedClass();

1500 if (InstrItins && !InstrItins->isEmpty()) {

1501 for (const InstrStage &IS :

1503 InstrItins->endStage(SchedClass))) {

1506 Resources[FuncUnits]++;

1507 }

1508 return;

1509 }

1511 const MCSchedClassDesc *SCDesc =

1514

1515

1516 return;

1517

1518 for (const MCWriteProcResEntry &PRE :

1521 if (!PRE.ReleaseAtCycle)

1522 continue;

1523 Resources[PRE.ProcResourceIdx]++;

1524 }

1525 return;

1526 }

1527 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");

1528 }

1529

1530

1531 bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {

1533 unsigned MFUs1 = minFuncUnits(IS1, F1);

1534 unsigned MFUs2 = minFuncUnits(IS2, F2);

1535 if (MFUs1 == MFUs2)

1536 return Resources.lookup(F1) < Resources.lookup(F2);

1537 return MFUs1 > MFUs2;

1538 }

1539};

1540

1541

1542class HighRegisterPressureDetector {

1543 MachineBasicBlock *OrigMBB;

1544 const MachineRegisterInfo &MRI;

1545 const TargetRegisterInfo *TRI;

1546

1547 const unsigned PSetNum;

1548

1549

1550

1551

1552

1553 std::vector InitSetPressure;

1554

1555

1556

1557 std::vector PressureSetLimit;

1558

1559 DenseMap<MachineInstr *, RegisterOperands> ROMap;

1560

1561 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;

1562

1563public:

1564 using OrderedInstsTy = std::vector<MachineInstr *>;

1565 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;

1566

1567private:

1568 static void dumpRegisterPressures(const std::vector &Pressures) {

1569 if (Pressures.size() == 0) {

1570 dbgs() << "[]";

1571 } else {

1573 for (unsigned P : Pressures) {

1576 }

1577 dbgs() << ']';

1578 }

1579 }

1580

1583

1584 VirtRegOrUnit VRegOrUnit =

1586 : VirtRegOrUnit(static_cast(Reg.id()));

1587 for (auto PSetIter = MRI.getPressureSets(VRegOrUnit); PSetIter.isValid();

1588 ++PSetIter) {

1589 dbgs() << *PSetIter << ' ';

1590 }

1591 dbgs() << '\n';

1592 }

1593

1594 void increaseRegisterPressure(std::vector &Pressure,

1596

1597 VirtRegOrUnit VRegOrUnit =

1599 : VirtRegOrUnit(static_cast(Reg.id()));

1600 auto PSetIter = MRI.getPressureSets(VRegOrUnit);

1601 unsigned Weight = PSetIter.getWeight();

1602 for (; PSetIter.isValid(); ++PSetIter)

1603 Pressure[*PSetIter] += Weight;

1604 }

1605

1606 void decreaseRegisterPressure(std::vector &Pressure,

1608 auto PSetIter = MRI.getPressureSets(VirtRegOrUnit(Reg));

1609 unsigned Weight = PSetIter.getWeight();

1610 for (; PSetIter.isValid(); ++PSetIter) {

1611 auto &P = Pressure[*PSetIter];

1613 "register pressure must be greater than or equal weight");

1614 P -= Weight;

1615 }

1616 }

1617

1618

1619 bool isReservedRegister(Register Reg) const {

1621 }

1622

1623 bool isDefinedInThisLoop(Register Reg) const {

1624 return Reg.isVirtual() && MRI.getVRegDef(Reg)->getParent() == OrigMBB;

1625 }

1626

1627

1628

1629

1630

1631

1632

1633

1634

1635 void computeLiveIn() {

1636 DenseSet Used;

1637 for (auto &MI : *OrigMBB) {

1638 if (MI.isDebugInstr())

1639 continue;

1640 for (auto &Use : ROMap[&MI].Uses) {

1641

1643 Use.VRegOrUnit.isVirtualReg()

1644 ? Use.VRegOrUnit.asVirtualReg()

1645 : Register(static_cast<unsigned>(Use.VRegOrUnit.asMCRegUnit()));

1646

1647

1649 continue;

1650 if (isReservedRegister(Reg))

1651 continue;

1652 if (isDefinedInThisLoop(Reg))

1653 continue;

1655 }

1656 }

1657

1658 for (auto LiveIn : Used)

1659 increaseRegisterPressure(InitSetPressure, LiveIn);

1660 }

1661

1662

1663 void computePressureSetLimit(const RegisterClassInfo &RCI) {

1664 for (unsigned PSet = 0; PSet < PSetNum; PSet++)

1666 }

1667

1668

1669

1670

1671

1672

1673

1674

1675

1676

1677

1678

1679 Instr2LastUsesTy computeLastUses(const OrderedInstsTy &OrderedInsts,

1680 Instr2StageTy &Stages) const {

1681

1682

1683

1684

1685 DenseSet TargetRegs;

1686 const auto UpdateTargetRegs = [this, &TargetRegs](Register Reg) {

1687 if (isDefinedInThisLoop(Reg))

1689 };

1690 for (MachineInstr *MI : OrderedInsts) {

1691 if (MI->isPHI()) {

1693 UpdateTargetRegs(Reg);

1694 } else {

1695 for (auto &Use : ROMap.find(MI)->getSecond().Uses) {

1696

1698 ? Use.VRegOrUnit.asVirtualReg()

1699 : Register(static_cast<unsigned>(

1700 Use.VRegOrUnit.asMCRegUnit()));

1701 UpdateTargetRegs(Reg);

1702 }

1703 }

1704 }

1705

1706 const auto InstrScore = [&Stages](MachineInstr *MI) {

1707 return Stages[MI] + MI->isPHI();

1708 };

1709

1710 DenseMap<Register, MachineInstr *> LastUseMI;

1712 for (auto &Use : ROMap.find(MI)->getSecond().Uses) {

1713

1715 Use.VRegOrUnit.isVirtualReg()

1716 ? Use.VRegOrUnit.asVirtualReg()

1717 : Register(static_cast<unsigned>(Use.VRegOrUnit.asMCRegUnit()));

1719 continue;

1721 if (!Inserted) {

1722 MachineInstr *Orig = Ite->second;

1723 MachineInstr *New = MI;

1724 if (InstrScore(Orig) < InstrScore(New))

1725 Ite->second = New;

1726 }

1727 }

1728 }

1729

1730 Instr2LastUsesTy LastUses;

1731 for (auto [Reg, MI] : LastUseMI)

1732 LastUses[MI].insert(Reg);

1733 return LastUses;

1734 }

1735

1736

1737

1738

1739

1740

1741

1742

1743

1744

1745

1746

1747

1748 std::vector

1749 computeMaxSetPressure(const OrderedInstsTy &OrderedInsts,

1750 Instr2StageTy &Stages,

1751 const unsigned StageCount) const {

1752 using RegSetTy = SmallDenseSet<Register, 16>;

1753

1754

1755

1757

1758 auto CurSetPressure = InitSetPressure;

1759 auto MaxSetPressure = InitSetPressure;

1760 auto LastUses = computeLastUses(OrderedInsts, Stages);

1761

1763 dbgs() << "Ordered instructions:\n";

1764 for (MachineInstr *MI : OrderedInsts) {

1765 dbgs() << "Stage " << Stages[MI] << ": ";

1767 }

1768 });

1769

1770 const auto InsertReg = [this, &CurSetPressure](RegSetTy &RegSet,

1771 VirtRegOrUnit VRegOrUnit) {

1772

1778 return;

1779

1780 bool Inserted = RegSet.insert(Reg).second;

1781 if (!Inserted)

1782 return;

1783

1785 increaseRegisterPressure(CurSetPressure, Reg);

1787 };

1788

1789 const auto EraseReg = [this, &CurSetPressure](RegSetTy &RegSet,

1792 return;

1793

1794

1795 if (!RegSet.contains(Reg))

1796 return;

1797

1799 RegSet.erase(Reg);

1800 decreaseRegisterPressure(CurSetPressure, Reg);

1802 };

1803

1804 for (unsigned I = 0; I < StageCount; I++) {

1805 for (MachineInstr *MI : OrderedInsts) {

1806 const auto Stage = Stages[MI];

1807 if (I < Stage)

1808 continue;

1809

1810 const unsigned Iter = I - Stage;

1811

1812 for (auto &Def : ROMap.find(MI)->getSecond().Defs)

1813 InsertReg(LiveRegSets[Iter], Def.VRegOrUnit);

1814

1815 for (auto LastUse : LastUses[MI]) {

1816 if (MI->isPHI()) {

1817 if (Iter != 0)

1818 EraseReg(LiveRegSets[Iter - 1], LastUse);

1819 } else {

1820 EraseReg(LiveRegSets[Iter], LastUse);

1821 }

1822 }

1823

1824 for (unsigned PSet = 0; PSet < PSetNum; PSet++)

1825 MaxSetPressure[PSet] =

1826 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);

1827

1829 dbgs() << "CurSetPressure=";

1830 dumpRegisterPressures(CurSetPressure);

1831 dbgs() << " iter=" << Iter << " stage=" << Stage << ":";

1833 });

1834 }

1835 }

1836

1837 return MaxSetPressure;

1838 }

1839

1840public:

1841 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,

1842 const MachineFunction &MF)

1843 : OrigMBB(OrigMBB), MRI(MF.getRegInfo()),

1844 TRI(MF.getSubtarget().getRegisterInfo()),

1845 PSetNum(TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),

1846 PressureSetLimit(PSetNum, 0) {}

1847

1848

1849

1850 void init(const RegisterClassInfo &RCI) {

1851 for (MachineInstr &MI : *OrigMBB) {

1852 if (MI.isDebugInstr())

1853 continue;

1854 ROMap[&MI].collect(MI, *TRI, MRI, false, true);

1855 }

1856

1857 computeLiveIn();

1858 computePressureSetLimit(RCI);

1859 }

1860

1861

1862

1863 bool detect(const SwingSchedulerDAG *SSD, SMSchedule &Schedule,

1864 const unsigned MaxStage) const {

1866 "the percentage of the margin must be between 0 to 100");

1867

1868 OrderedInstsTy OrderedInsts;

1869 Instr2StageTy Stages;

1871 const auto MaxSetPressure =

1872 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);

1873

1875 dbgs() << "Dump MaxSetPressure:\n";

1876 for (unsigned I = 0; I < MaxSetPressure.size(); I++) {

1877 dbgs() << format("MaxSetPressure[%d]=%d\n", I, MaxSetPressure[I]);

1878 }

1879 dbgs() << '\n';

1880 });

1881

1882 for (unsigned PSet = 0; PSet < PSetNum; PSet++) {

1883 unsigned Limit = PressureSetLimit[PSet];

1885 LLVM_DEBUG(dbgs() << "PSet=" << PSet << " Limit=" << Limit

1886 << " Margin=" << Margin << "\n");

1887 if (Limit < MaxSetPressure[PSet] + Margin) {

1890 << "Rejected the schedule because of too high register pressure\n");

1891 return true;

1892 }

1893 }

1894 return false;

1895 }

1896};

1897

1898}

1899

1900

1901

1902

1903

1904

1905

1906unsigned SwingSchedulerDAG::calculateResMII() {

1908 ResourceManager RM(&MF.getSubtarget(), this);

1909 return RM.calculateResMII();

1910}

1911

1912

1913

1914

1915

1916

1917

1918unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {

1919 unsigned RecMII = 0;

1920

1921 for (NodeSet &Nodes : NodeSets) {

1922 if (Nodes.empty())

1923 continue;

1924

1925 unsigned Delay = Nodes.getLatency();

1926 unsigned Distance = 1;

1927

1928

1929 unsigned CurMII = (Delay + Distance - 1) / Distance;

1930 Nodes.setRecMII(CurMII);

1931 if (CurMII > RecMII)

1932 RecMII = CurMII;

1933 }

1934

1935 return RecMII;

1936}

1937

1938

1939void SwingSchedulerDAG::Circuits::createAdjacencyStructure(

1940 SwingSchedulerDAG *DAG) {

1941 BitVector Added(SUnits.size());

1942 DenseMap<int, int> OutputDeps;

1943 for (int i = 0, e = SUnits.size(); i != e; ++i) {

1945

1946 for (auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {

1947

1948

1949 if (OE.isOutputDep()) {

1950 int N = OE.getDst()->NodeNum;

1951 int BackEdge = i;

1952 auto Dep = OutputDeps.find(BackEdge);

1953 if (Dep != OutputDeps.end()) {

1954 BackEdge = Dep->second;

1955 OutputDeps.erase(Dep);

1956 }

1957 OutputDeps[N] = BackEdge;

1958 }

1959

1960 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())

1961 continue;

1962

1963

1964

1965

1966

1967

1968

1969 if (OE.isAntiDep())

1970 continue;

1971

1972 int N = OE.getDst()->NodeNum;

1973 if (Added.test(N)) {

1974 AdjK[i].push_back(N);

1976 }

1977 }

1978

1979

1980 for (auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {

1981 SUnit *Src = IE.getSrc();

1982 SUnit *Dst = IE.getDst();

1983 if (!Dst->getInstr()->mayStore() || !DAG->isLoopCarriedDep(IE))

1984 continue;

1985 if (IE.isOrderDep() && Src->getInstr()->mayLoad()) {

1986 int N = Src->NodeNum;

1987 if (Added.test(N)) {

1988 AdjK[i].push_back(N);

1990 }

1991 }

1992 }

1993 }

1994

1995 for (auto &OD : OutputDeps)

1996 if (Added.test(OD.second)) {

1997 AdjK[OD.first].push_back(OD.second);

1998 Added.set(OD.second);

1999 }

2000}

2001

2002

2003

2004bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,

2005 const SwingSchedulerDAG *DAG,

2006 bool HasBackedge) {

2007 SUnit *SV = &SUnits[V];

2008 bool F = false;

2009 Stack.insert(SV);

2010 Blocked.set(V);

2011

2012 for (auto W : AdjK[V]) {

2013 if (NumPaths > MaxPaths)

2014 break;

2015 if (W < S)

2016 continue;

2017 if (W == S) {

2018 if (!HasBackedge)

2020 F = true;

2021 ++NumPaths;

2022 break;

2023 }

2024 if (!Blocked.test(W)) {

2025 if (circuit(W, S, NodeSets, DAG,

2026 Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))

2027 F = true;

2028 }

2029 }

2030

2031 if (F)

2032 unblock(V);

2033 else {

2034 for (auto W : AdjK[V]) {

2035 if (W < S)

2036 continue;

2037 B[W].insert(SV);

2038 }

2039 }

2040 Stack.pop_back();

2041 return F;

2042}

2043

2044

2045void SwingSchedulerDAG::Circuits::unblock(int U) {

2046 Blocked.reset(U);

2047 SmallPtrSet<SUnit *, 4> &BU = B[U];

2048 while (!BU.empty()) {

2049 SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();

2050 assert(SI != BU.end() && "Invalid B set.");

2051 SUnit *W = *SI;

2053 if (Blocked.test(W->NodeNum))

2054 unblock(W->NodeNum);

2055 }

2056}

2057

2058

2059

2060void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {

2061 Circuits Cir(SUnits, Topo);

2062

2063 Cir.createAdjacencyStructure(this);

2064 for (int I = 0, E = SUnits.size(); I != E; ++I) {

2065 Cir.reset();

2066 Cir.circuit(I, I, NodeSets, this);

2067 }

2068}

2069

2070

2071

2072

2073

2074

2075

2076

2077

2078

2079

2080

2081

2082

2083

2084

2085

2086

2087

2088void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {

2089 for (SUnit &SU : DAG->SUnits) {

2090

2092 continue;

2093

2094

2096

2098

2099 for (auto &Dep : SU.Preds) {

2100 SUnit *TmpSU = Dep.getSUnit();

2101 MachineInstr *TmpMI = TmpSU->getInstr();

2103

2106

2107

2110 }

2111

2112 if (PHISUs.size() == 0 || SrcSUs.size() == 0)

2113 continue;

2114

2115

2116

2118

2119 for (size_t Index = 0; Index < PHISUs.size(); ++Index) {

2120 for (auto &Dep : PHISUs[Index]->Succs) {

2122 continue;

2123

2124 SUnit *TmpSU = Dep.getSUnit();

2125 MachineInstr *TmpMI = TmpSU->getInstr();

2128 continue;

2129 }

2131 }

2132 }

2133

2134 if (UseSUs.size() == 0)

2135 continue;

2136

2138

2139 for (auto *I : UseSUs) {

2140 for (auto *Src : SrcSUs) {

2141 if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {

2144 }

2145 }

2146 }

2147 }

2148}

2149

2150

2151

2152

2153

2154

2155

2156void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {

2157 ScheduleInfo.resize(SUnits.size());

2158

2160 for (int I : Topo) {

2161 const SUnit &SU = SUnits[I];

2162 dumpNode(SU);

2163 }

2164 });

2165

2166 int maxASAP = 0;

2167

2168 for (int I : Topo) {

2169 int asap = 0;

2170 int zeroLatencyDepth = 0;

2171 SUnit *SU = &SUnits[I];

2172 for (const auto &IE : DDG->getInEdges(SU)) {

2173 SUnit *Pred = IE.getSrc();

2174 if (IE.getLatency() == 0)

2175 zeroLatencyDepth =

2176 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);

2177 if (IE.ignoreDependence(true))

2178 continue;

2179 asap = std::max(asap, (int)(getASAP(Pred) + IE.getLatency() -

2180 IE.getDistance() * MII));

2181 }

2182 maxASAP = std::max(maxASAP, asap);

2183 ScheduleInfo[I].ASAP = asap;

2184 ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;

2185 }

2186

2187

2189 int alap = maxASAP;

2190 int zeroLatencyHeight = 0;

2191 SUnit *SU = &SUnits[I];

2192 for (const auto &OE : DDG->getOutEdges(SU)) {

2193 SUnit *Succ = OE.getDst();

2195 continue;

2196 if (OE.getLatency() == 0)

2197 zeroLatencyHeight =

2198 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);

2199 if (OE.ignoreDependence(true))

2200 continue;

2201 alap = std::min(alap, (int)(getALAP(Succ) - OE.getLatency() +

2202 OE.getDistance() * MII));

2203 }

2204

2205 ScheduleInfo[I].ALAP = alap;

2206 ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;

2207 }

2208

2209

2210 for (NodeSet &I : NodeSets)

2211 I.computeNodeSetInfo(this);

2212

2214 for (unsigned i = 0; i < SUnits.size(); i++) {

2215 dbgs() << "\tNode " << i << ":\n";

2216 dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";

2217 dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";

2218 dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";

2219 dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";

2220 dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";

2221 dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";

2222 dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";

2223 }

2224 });

2225}

2226

2227

2228

2229

2232 const NodeSet *S = nullptr) {

2234

2236 for (const auto &IE : DDG->getInEdges(SU)) {

2237 SUnit *PredSU = IE.getSrc();

2238 if (S && S->count(PredSU) == 0)

2239 continue;

2240 if (IE.ignoreDependence(true))

2241 continue;

2242 if (NodeOrder.count(PredSU) == 0)

2243 Preds.insert(PredSU);

2244 }

2245

2246

2247

2248

2249

2250 for (const auto &OE : DDG->getOutEdges(SU)) {

2251 SUnit *SuccSU = OE.getDst();

2252 if (!OE.isAntiDep())

2253 continue;

2254 if (S && S->count(SuccSU) == 0)

2255 continue;

2256 if (NodeOrder.count(SuccSU) == 0)

2257 Preds.insert(SuccSU);

2258 }

2259 }

2260 return !Preds.empty();

2261}

2262

2263

2264

2265

2268 const NodeSet *S = nullptr) {

2270

2272 for (const auto &OE : DDG->getOutEdges(SU)) {

2273 SUnit *SuccSU = OE.getDst();

2274 if (S && S->count(SuccSU) == 0)

2275 continue;

2276 if (OE.ignoreDependence(false))

2277 continue;

2278 if (NodeOrder.count(SuccSU) == 0)

2279 Succs.insert(SuccSU);

2280 }

2281

2282

2283

2284

2285

2286 for (const auto &IE : DDG->getInEdges(SU)) {

2287 SUnit *PredSU = IE.getSrc();

2288 if (!IE.isAntiDep())

2289 continue;

2290 if (S && S->count(PredSU) == 0)

2291 continue;

2292 if (NodeOrder.count(PredSU) == 0)

2293 Succs.insert(PredSU);

2294 }

2295 }

2296 return !Succs.empty();

2297}

2298

2299

2300

2307 return false;

2309 return false;

2310 if (DestNodes.contains(Cur))

2311 return true;

2312 if (!Visited.insert(Cur).second)

2313 return Path.contains(Cur);

2314 bool FoundPath = false;

2315 for (const auto &OE : DDG->getOutEdges(Cur))

2316 if (!OE.ignoreDependence(false))

2317 FoundPath |=

2318 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);

2319 for (const auto &IE : DDG->getInEdges(Cur))

2320 if (IE.isAntiDep() && IE.getDistance() == 0)

2321 FoundPath |=

2322 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);

2323 if (FoundPath)

2324 Path.insert(Cur);

2325 return FoundPath;

2326}

2327

2328

2329

2330

2337 for (SUnit *SU : NS) {

2339 if (MI->isPHI())

2340 continue;

2343 if (Reg.isVirtual())

2345 else if (MRI.isAllocatable(Reg))

2346 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))

2348 }

2349 }

2350 for (SUnit *SU : NS)

2352 if (!MO.isDead()) {

2354 if (Reg.isVirtual()) {

2358 } else if (MRI.isAllocatable(Reg)) {

2359 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))

2363 }

2364 }

2366}

2367

2368

2369

2370void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {

2371 for (auto &NS : NodeSets) {

2372

2373 if (NS.size() <= 2)

2374 continue;

2375 IntervalPressure RecRegPressure;

2376 RegPressureTracker RecRPTracker(RecRegPressure);

2377 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);

2379 RecRPTracker.closeBottom();

2380

2381 std::vector<SUnit *> SUnits(NS.begin(), NS.end());

2382 llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {

2383 return A->NodeNum > B->NodeNum;

2384 });

2385

2386 for (auto &SU : SUnits) {

2387

2388

2389

2390

2392 RecRPTracker.setPos(std::next(CurInstI));

2393

2394 RegPressureDelta RPDelta;

2396 RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,

2397 CriticalPSets,

2401 dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "

2404 NS.setExceedPressure(SU);

2405 break;

2406 }

2407 RecRPTracker.recede();

2408 }

2409 }

2410}

2411

2412

2413

2414void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {

2415 unsigned Colocate = 0;

2416 for (int i = 0, e = NodeSets.size(); i < e; ++i) {

2417 NodeSet &N1 = NodeSets[i];

2418 SmallSetVector<SUnit *, 8> S1;

2420 continue;

2421 for (int j = i + 1; j < e; ++j) {

2424 continue;

2425 SmallSetVector<SUnit *, 8> S2;

2426 if (N2.empty() || succ\_L(N2, S2, DDG.get()))

2427 continue;

2431 break;

2432 }

2433 }

2434 }

2435}

2436

2437

2438

2439

2440

2441

2442void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {

2443

2444 if (MII < 17)

2445 return;

2446

2447 for (auto &NS : NodeSets) {

2448 if (NS.getRecMII() > 2)

2449 return;

2450 if (NS.getMaxDepth() > MII)

2451 return;

2452 }

2453 NodeSets.clear();

2454 LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");

2455}

2456

2457

2458

2459void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {

2460 SetVector<SUnit *> NodesAdded;

2461 SmallPtrSet<SUnit *, 8> Visited;

2462

2463

2464 for (NodeSet &I : NodeSets) {

2465 SmallSetVector<SUnit *, 8> N;

2466

2467 if (succ_L(I, N, DDG.get())) {

2468 SetVector<SUnit *> Path;

2469 for (SUnit *NI : N) {

2470 Visited.clear();

2471 computePath(NI, Path, NodesAdded, I, Visited, DDG.get());

2472 }

2473 if (Path.empty())

2474 I.insert(Path.begin(), Path.end());

2475 }

2476

2477 N.clear();

2478 if (succ_L(NodesAdded, N, DDG.get())) {

2479 SetVector<SUnit *> Path;

2480 for (SUnit *NI : N) {

2481 Visited.clear();

2482 computePath(NI, Path, I, NodesAdded, Visited, DDG.get());

2483 }

2484 if (Path.empty())

2485 I.insert(Path.begin(), Path.end());

2486 }

2488 }

2489

2490

2491

2493 SmallSetVector<SUnit *, 8> N;

2494 if (succ_L(NodesAdded, N, DDG.get()))

2495 for (SUnit *I : N)

2496 addConnectedNodes(I, NewSet, NodesAdded);

2497 if (!NewSet.empty())

2498 NodeSets.push_back(NewSet);

2499

2500

2501

2503 if (pred_L(NodesAdded, N, DDG.get()))

2504 for (SUnit *I : N)

2505 addConnectedNodes(I, NewSet, NodesAdded);

2506 if (!NewSet.empty())

2507 NodeSets.push_back(NewSet);

2508

2509

2510

2511 for (SUnit &SU : SUnits) {

2512 if (NodesAdded.count(&SU) == 0) {

2514 addConnectedNodes(&SU, NewSet, NodesAdded);

2515 if (!NewSet.empty())

2516 NodeSets.push_back(NewSet);

2517 }

2518 }

2519}

2520

2521

2522void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,

2523 SetVector<SUnit *> &NodesAdded) {

2525 NodesAdded.insert(SU);

2526 for (auto &OE : DDG->getOutEdges(SU)) {

2528 if (!OE.isArtificial() && Successor->isBoundaryNode() &&

2530 addConnectedNodes(Successor, NewSet, NodesAdded);

2531 }

2532 for (auto &IE : DDG->getInEdges(SU)) {

2533 SUnit *Predecessor = IE.getSrc();

2534 if (IE.isArtificial() && NodesAdded.count(Predecessor) == 0)

2535 addConnectedNodes(Predecessor, NewSet, NodesAdded);

2536 }

2537}

2538

2539

2540

2543 Result.clear();

2544 for (SUnit *SU : Set1) {

2545 if (Set2.count(SU) != 0)

2546 Result.insert(SU);

2547 }

2548 return !Result.empty();

2549}

2550

2551

2552void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {

2553 for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;

2554 ++I) {

2556 for (NodeSetType::iterator J = I + 1; J != E;) {

2561 for (SUnit *SU : *J)

2562 I->insert(SU);

2563 NodeSets.erase(J);

2564 E = NodeSets.end();

2565 } else {

2566 ++J;

2567 }

2568 }

2569 }

2570}

2571

2572

2573void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {

2574 for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;

2575 ++I)

2576 for (NodeSetType::iterator J = I + 1; J != E;) {

2577 J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });

2578

2579 if (J->empty()) {

2580 NodeSets.erase(J);

2581 E = NodeSets.end();

2582 } else {

2583 ++J;

2584 }

2585 }

2586}

2587

2588

2589

2590

2591

2592void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {

2593 SmallSetVector<SUnit *, 8> R;

2595

2596 for (auto &Nodes : NodeSets) {

2597 LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");

2598 OrderKind Order;

2599 SmallSetVector<SUnit *, 8> N;

2601 R.insert_range(N);

2606 R.insert_range(N);

2610

2611

2614 } else if (NodeSets.size() == 1) {

2615 for (const auto &N : Nodes)

2616 if (N->Succs.size() == 0)

2617 R.insert(N);

2620 } else {

2621

2622 SUnit *maxASAP = nullptr;

2623 for (SUnit *SU : Nodes) {

2624 if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||

2625 (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))

2626 maxASAP = SU;

2627 }

2628 R.insert(maxASAP);

2631 }

2632

2633 while (R.empty()) {

2634 if (Order == TopDown) {

2635

2636

2637

2638 while (R.empty()) {

2639 SUnit *maxHeight = nullptr;

2640 for (SUnit *I : R) {

2641 if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))

2642 maxHeight = I;

2643 else if (getHeight(I) == getHeight(maxHeight) &&

2644 getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))

2645 maxHeight = I;

2646 else if (getHeight(I) == getHeight(maxHeight) &&

2647 getZeroLatencyHeight(I) ==

2648 getZeroLatencyHeight(maxHeight) &&

2649 getMOV(I) < getMOV(maxHeight))

2650 maxHeight = I;

2651 }

2654 R.remove(maxHeight);

2655 for (const auto &OE : DDG->getOutEdges(maxHeight)) {

2656 SUnit *SU = OE.getDst();

2657 if (Nodes.count(SU) == 0)

2658 continue;

2660 continue;

2661 if (OE.ignoreDependence(false))

2662 continue;

2663 R.insert(SU);

2664 }

2665

2666

2667

2668

2669

2670 for (const auto &IE : DDG->getInEdges(maxHeight)) {

2671 SUnit *SU = IE.getSrc();

2672 if (IE.isAntiDep())

2673 continue;

2674 if (Nodes.count(SU) == 0)

2675 continue;

2677 continue;

2678 R.insert(SU);

2679 }

2680 }

2682 LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");

2683 SmallSetVector<SUnit *, 8> N;

2685 R.insert_range(N);

2686 } else {

2687

2688

2689

2690 while (R.empty()) {

2691 SUnit *maxDepth = nullptr;

2692 for (SUnit *I : R) {

2693 if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))

2694 maxDepth = I;

2695 else if (getDepth(I) == getDepth(maxDepth) &&

2696 getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))

2697 maxDepth = I;

2698 else if (getDepth(I) == getDepth(maxDepth) &&

2699 getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&

2700 getMOV(I) < getMOV(maxDepth))

2701 maxDepth = I;

2702 }

2705 R.remove(maxDepth);

2706 if (Nodes.isExceedSU(maxDepth)) {

2708 R.clear();

2709 R.insert(Nodes.getNode(0));

2710 break;

2711 }

2712 for (const auto &IE : DDG->getInEdges(maxDepth)) {

2713 SUnit *SU = IE.getSrc();

2714 if (Nodes.count(SU) == 0)

2715 continue;

2717 continue;

2718 R.insert(SU);

2719 }

2720

2721

2722

2723

2724

2725 for (const auto &OE : DDG->getOutEdges(maxDepth)) {

2726 SUnit *SU = OE.getDst();

2727 if (!OE.isAntiDep())

2728 continue;

2729 if (Nodes.count(SU) == 0)

2730 continue;

2732 continue;

2733 R.insert(SU);

2734 }

2735 }

2737 LLVM_DEBUG(dbgs() << "\n Switching order to top down ");

2738 SmallSetVector<SUnit *, 8> N;

2740 R.insert_range(N);

2741 }

2742 }

2744 }

2745

2747 dbgs() << "Node order: ";

2749 dbgs() << " " << I->NodeNum << " ";

2750 dbgs() << "\n";

2751 });

2752}

2753

2754

2755

2756bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {

2757

2759 LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );

2760 return false;

2761 }

2762

2763 bool scheduleFound = false;

2764 std::unique_ptr HRPDetector;

2766 HRPDetector =

2767 std::make_unique(Loop.getHeader(), MF);

2768 HRPDetector->init(RegClassInfo);

2769 }

2770

2771 for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {

2772 Schedule.reset();

2774 LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");

2775

2778 do {

2779 SUnit *SU = *NI;

2780

2781

2782

2783 int EarlyStart = INT_MIN;

2784 int LateStart = INT_MAX;

2785 Schedule.computeStart(SU, &EarlyStart, &LateStart, II, this);

2787 dbgs() << "\n";

2788 dbgs() << "Inst (" << SU->NodeNum << ") ";

2790 dbgs() << "\n";

2791 });

2793 dbgs() << format("\tes: %8x ls: %8x\n", EarlyStart, LateStart));

2794

2795 if (EarlyStart > LateStart)

2796 scheduleFound = false;

2797 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)

2798 scheduleFound =

2799 Schedule.insert(SU, EarlyStart, EarlyStart + (int)II - 1, II);

2800 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)

2801 scheduleFound =

2802 Schedule.insert(SU, LateStart, LateStart - (int)II + 1, II);

2803 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {

2804 LateStart = std::min(LateStart, EarlyStart + (int)II - 1);

2805

2806

2807

2808

2809

2810

2813 scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II);

2814 else

2815 scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II);

2816 } else {

2818 scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),

2819 FirstCycle + getASAP(SU) + II - 1, II);

2820 }

2821

2822

2823

2824 if (scheduleFound)

2827 scheduleFound = false;

2828

2830 if (!scheduleFound)

2831 dbgs() << "\tCan't schedule\n";

2832 });

2833 } while (++NI != NE && scheduleFound);

2834

2835

2836

2837 if (scheduleFound)

2838 scheduleFound = DDG->isValidSchedule(Schedule);

2839

2840

2841 if (scheduleFound)

2842 scheduleFound =

2844

2845

2846 if (scheduleFound)

2848

2849

2850

2852 scheduleFound =

2853 !HRPDetector->detect(this, Schedule, Schedule.getMaxStageCount());

2854 }

2855

2856 LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound

2858 << ")\n");

2859

2860 if (scheduleFound) {

2861 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);

2862 if (!scheduleFound)

2864 }

2865

2866 if (scheduleFound) {

2868 Pass.ORE->emit([&]() {

2869 return MachineOptimizationRemarkAnalysis(

2870 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

2871 << "Schedule found with Initiation Interval: "

2873 << ", MaxStageCount: "

2875 });

2876 } else

2877 Schedule.reset();

2878

2880}

2881

2887 if (Reg.isVirtual())

2889 if (MRI.getVRegDef(Reg)->getParent() != MI.getParent())

2890 continue;

2891 if (Result)

2893 Result = Reg;

2894 }

2895 return Result;

2896}

2897

2898

2899

2901 if (Op.isReg() || Op.getReg().isVirtual())

2902 return false;

2903

2908

2913

2916

2917

2918

2919

2920

2921

2922

2923

2924

2925 while (true) {

2927 return false;

2929 if (Def->getParent() != LoopBB)

2930 return false;

2931

2932 if (Def->isCopy()) {

2933

2934 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())

2935 return false;

2936 CurReg = Def->getOperand(1).getReg();

2937 } else if (Def->isPHI()) {

2938

2939 if (Phi)

2940 return false;

2941 Phi = Def;

2943 } else if (TII->getIncrementValue(*Def, Value)) {

2944

2946

2947 return false;

2948

2951 bool OffsetIsScalable;

2952 if (TII->getMemOperandWithOffset(*Def, BaseOp, Offset, OffsetIsScalable,

2954

2955 CurReg = BaseOp->getReg();

2956 } else {

2957

2958

2961 return false;

2962 }

2964 } else {

2965 return false;

2966 }

2967 if (CurReg == OrgReg)

2968 break;

2969 }

2970

2972 return false;

2973

2974 return true;

2975}

2976

2977

2978

2979bool SwingSchedulerDAG::computeDelta(const MachineInstr &MI, int &Delta) const {

2980 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

2981 const MachineOperand *BaseOp;

2983 bool OffsetIsScalable;

2985 return false;

2986

2987

2988 if (OffsetIsScalable)

2989 return false;

2990

2991 if (!BaseOp->isReg())

2992 return false;

2993

2995}

2996

2997

2998

2999

3000

3001

3002

3003

3004bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,

3005 unsigned &BasePos,

3006 unsigned &OffsetPos,

3009

3011 return false;

3012 unsigned BasePosLd, OffsetPosLd;

3014 return false;

3016

3017

3018 MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();

3019 MachineInstr *Phi = MRI.getVRegDef(BaseReg);

3020 if (!Phi || Phi->isPHI())

3021 return false;

3022

3024 if (!PrevReg)

3025 return false;

3026

3027

3028 MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);

3029 if (!PrevDef || PrevDef == MI)

3030 return false;

3031

3033 return false;

3034

3035 unsigned BasePos1 = 0, OffsetPos1 = 0;

3037 return false;

3038

3039

3040

3041 int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();

3042 int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();

3043 MachineInstr *NewMI = MF.CloneMachineInstr(MI);

3044 NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);

3046 MF.deleteMachineInstr(NewMI);

3047 if (!Disjoint)

3048 return false;

3049

3050

3051 BasePos = BasePosLd;

3052 OffsetPos = OffsetPosLd;

3053 NewBase = PrevReg;

3054 Offset = StoreOffset;

3055 return true;

3056}

3057

3058

3059

3064 InstrChanges.find(SU);

3065 if (It != InstrChanges.end()) {

3066 std::pair<Register, int64_t> RegAndOffset = It->second;

3067 unsigned BasePos, OffsetPos;

3068 if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))

3069 return;

3070 Register BaseReg = MI->getOperand(BasePos).getReg();

3071 MachineInstr *LoopDef = findDefInLoop(BaseReg);

3076 if (BaseStageNum < DefStageNum) {

3078 int OffsetDiff = DefStageNum - BaseStageNum;

3079 if (DefCycleNum < BaseCycleNum) {

3081 if (OffsetDiff > 0)

3082 --OffsetDiff;

3083 }

3084 int64_t NewOffset =

3085 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;

3089 NewMIs[MI] = NewMI;

3090 }

3091 }

3092}

3093

3094

3095

3096

3100 while (Def->isPHI()) {

3101 if (!Visited.insert(Def).second)

3102 break;

3103 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)

3104 if (Def->getOperand(i + 1).getMBB() == BB) {

3105 Def = MRI.getVRegDef(Def->getOperand(i).getReg());

3106 break;

3107 }

3108 }

3109 return Def;

3110}

3111

3112

3113

3116 int DeltaB, DeltaO, Delta;

3118 DeltaB != DeltaO)

3119 return true;

3120 Delta = DeltaB;

3121

3123 int64_t OffsetB, OffsetO;

3124 bool OffsetBIsScalable, OffsetOIsScalable;

3126 if (TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,

3127 OffsetBIsScalable, TRI) ||

3128 TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,

3129 OffsetOIsScalable, TRI))

3130 return true;

3131

3132 if (OffsetBIsScalable || OffsetOIsScalable)

3133 return true;

3134

3136

3137

3138

3139 if (!BaseOpB->isReg() || !BaseOpO->isReg())

3140 return true;

3142 if (!RegB.isVirtual() || !RegO.isVirtual())

3143 return true;

3144

3147 if (!DefB || !DefO || !DefB->isPHI() || !DefO->isPHI())

3148 return true;

3149

3158

3160 return true;

3161 }

3162

3165

3166

3167

3169 return true;

3170

3172 dbgs() << "Overlap check:\n";

3173 dbgs() << " BaseMI: ";

3174 BaseMI->dump();

3175 dbgs() << " Base + " << OffsetB << " + I * " << Delta

3176 << ", Len: " << AccessSizeB.getValue() << "\n";

3177 dbgs() << " OtherMI: ";

3178 OtherMI->dump();

3179 dbgs() << " Base + " << OffsetO << " + I * " << Delta

3180 << ", Len: " << AccessSizeO.getValue() << "\n";

3181 });

3182

3183

3184

3185

3186

3187 if (Delta < 0) {

3188 int64_t BaseMinAddr = OffsetB;

3189 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue() - 1;

3190 if (BaseMinAddr > OhterNextIterMaxAddr) {

3192 return false;

3193 }

3194 } else {

3195 int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue() - 1;

3196 int64_t OtherNextIterMinAddr = OffsetO + Delta;

3197 if (BaseMaxAddr < OtherNextIterMinAddr) {

3199 return false;

3200 }

3201 }

3203 return true;

3204}

3205

3206

3207

3208

3211 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||

3212 Edge.getDst()->isBoundaryNode())

3213 return false;

3214

3216 return true;

3217

3218 if (Edge.isOutputDep())

3219 return true;

3220

3222 MachineInstr *DI = Edge.getDst()->getInstr();

3223 assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");

3224

3225

3229 return true;

3230

3232 return false;

3233

3234

3235

3236

3238}

3239

3240void SwingSchedulerDAG::postProcessDAG() {

3241 for (auto &M : Mutations)

3242 M->apply(this);

3243}

3244

3245

3246

3247

3248

3249

3251 bool forward = true;

3253 dbgs() << "Trying to insert node between " << StartCycle << " and "

3254 << EndCycle << " II: " << II << "\n";

3255 });

3256 if (StartCycle > EndCycle)

3257 forward = false;

3258

3259

3260 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;

3261 for (int curCycle = StartCycle; curCycle != termCycle;

3262 forward ? ++curCycle : --curCycle) {

3263

3264 if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||

3265 ProcItinResources.canReserveResources(*SU, curCycle)) {

3267 dbgs() << "\tinsert at cycle " << curCycle << " ";

3269 });

3270

3271 if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))

3272 ProcItinResources.reserveResources(*SU, curCycle);

3273 ScheduledInstrs[curCycle].push_back(SU);

3274 InstrToCycle.insert(std::make_pair(SU, curCycle));

3275 if (curCycle > LastCycle)

3276 LastCycle = curCycle;

3277 if (curCycle < FirstCycle)

3278 FirstCycle = curCycle;

3279 return true;

3280 }

3282 dbgs() << "\tfailed to insert at cycle " << curCycle << " ";

3284 });

3285 }

3286 return false;

3287}

3288

3289

3295 int EarlyCycle = INT_MAX;

3296 while (!Worklist.empty()) {

3299 if (Visited.count(PrevSU))

3300 continue;

3301 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);

3302 if (it == InstrToCycle.end())

3303 continue;

3304 EarlyCycle = std::min(EarlyCycle, it->second);

3305 for (const auto &IE : DDG->getInEdges(PrevSU))

3306 if (IE.isOrderDep() || IE.isOutputDep())

3308 Visited.insert(PrevSU);

3309 }

3310 return EarlyCycle;

3311}

3312

3313

3319 int LateCycle = INT_MIN;

3320 while (!Worklist.empty()) {

3324 continue;

3325 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);

3326 if (it == InstrToCycle.end())

3327 continue;

3328 LateCycle = std::max(LateCycle, it->second);

3329 for (const auto &OE : DDG->getOutEdges(SuccSU))

3330 if (OE.isOrderDep() || OE.isOutputDep())

3332 Visited.insert(SuccSU);

3333 }

3334 return LateCycle;

3335}

3336

3337

3338

3339

3341 for (auto &P : SU->Preds)

3342 if (P.getKind() == SDep::Anti && P.getSUnit()->getInstr()->isPHI())

3343 for (auto &S : P.getSUnit()->Succs)

3344 if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())

3345 return P.getSUnit();

3346 return nullptr;

3347}

3348

3349

3350

3354

3355

3356

3357

3358 for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {

3360 for (const auto &IE : DDG->getInEdges(SU)) {

3361 if (IE.getSrc() == I) {

3362

3363

3366 *MinLateStart = std::min(*MinLateStart, End);

3367 }

3368 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() * II;

3369 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);

3370 }

3371 }

3372

3373 for (const auto &OE : DDG->getOutEdges(SU)) {

3374 if (OE.getDst() == I) {

3375

3376

3379 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);

3380 }

3381 int LateStart = cycle - OE.getLatency() + OE.getDistance() * II;

3382 *MinLateStart = std::min(*MinLateStart, LateStart);

3383 }

3384 }

3385

3387 for (const auto &Dep : SU->Preds) {

3388

3389

3390 if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&

3392 *MinLateStart = std::min(*MinLateStart, cycle);

3393 }

3394 }

3395 }

3396}

3397

3398

3399

3400

3402 std::deque<SUnit *> &Insts) const {

3404 bool OrderBeforeUse = false;

3405 bool OrderAfterDef = false;

3406 bool OrderBeforeDef = false;

3407 unsigned MoveDef = 0;

3408 unsigned MoveUse = 0;

3411

3412 unsigned Pos = 0;

3413 for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;

3414 ++I, ++Pos) {

3416 if (!MO.isReg() || !MO.getReg().isVirtual())

3417 continue;

3418

3420 unsigned BasePos, OffsetPos;

3421 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))

3422 if (MI->getOperand(BasePos).getReg() == Reg)

3424 Reg = NewReg;

3425 bool Reads, Writes;

3426 std::tie(Reads, Writes) =

3427 (*I)->getInstr()->readsWritesVirtualRegister(Reg);

3428 if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {

3429 OrderBeforeUse = true;

3430 if (MoveUse == 0)

3431 MoveUse = Pos;

3432 } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {

3433

3434 OrderAfterDef = true;

3435 MoveDef = Pos;

3436 } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {

3438 OrderBeforeUse = true;

3439 if (MoveUse == 0)

3440 MoveUse = Pos;

3441 } else {

3442 OrderAfterDef = true;

3443 MoveDef = Pos;

3444 }

3445 } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {

3446 OrderBeforeUse = true;

3447 if (MoveUse == 0)

3448 MoveUse = Pos;

3449 if (MoveUse != 0) {

3450 OrderAfterDef = true;

3451 MoveDef = Pos - 1;

3452 }

3453 } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {

3454

3455 OrderBeforeUse = true;

3456 if (MoveUse == 0)

3457 MoveUse = Pos;

3458 } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&

3460 if (MoveUse == 0) {

3461 OrderBeforeDef = true;

3462 MoveUse = Pos;

3463 }

3464 }

3465 }

3466

3467

3469 if (OE.getDst() != *I)

3470 continue;

3471 if (OE.isOrderDep() && stageScheduled(*I) == StageInst1) {

3472 OrderBeforeUse = true;

3473 if (Pos < MoveUse)

3474 MoveUse = Pos;

3475 }

3476

3477

3478

3479 else if ((OE.isAntiDep() || OE.isOutputDep()) &&

3481 OrderBeforeUse = true;

3482 if ((MoveUse == 0) || (Pos < MoveUse))

3483 MoveUse = Pos;

3484 }

3485 }

3486 for (auto &IE : DDG->getInEdges(SU)) {

3487 if (IE.getSrc() != *I)

3488 continue;

3489 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&

3491 OrderAfterDef = true;

3492 MoveDef = Pos;

3493 }

3494 }

3495 }

3496

3497

3498 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)

3499 OrderBeforeUse = false;

3500

3501

3502

3503 if (OrderBeforeDef)

3504 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);

3505

3506

3507

3508 if (OrderBeforeUse && OrderAfterDef) {

3509 SUnit *UseSU = Insts.at(MoveUse);

3510 SUnit *DefSU = Insts.at(MoveDef);

3511 if (MoveUse > MoveDef) {

3512 Insts.erase(Insts.begin() + MoveUse);

3513 Insts.erase(Insts.begin() + MoveDef);

3514 } else {

3515 Insts.erase(Insts.begin() + MoveDef);

3516 Insts.erase(Insts.begin() + MoveUse);

3517 }

3521 return;

3522 }

3523

3524

3525 if (OrderBeforeUse)

3526 Insts.push_front(SU);

3527 else

3528 Insts.push_back(SU);

3529}

3530

3531

3534 if (!Phi.isPHI())

3535 return false;

3536 assert(Phi.isPHI() && "Expecting a Phi.");

3540

3543 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);

3544 SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));

3545 if (!UseSU)

3546 return true;

3548 return true;

3551 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);

3552}

3553

3554

3555

3556

3557

3558

3559

3560

3564 if (!MO.isReg())

3565 return false;

3566 if (Def->isPHI())

3567 return false;

3569 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())

3570 return false;

3572 return false;

3575 if (DMO.getReg() == LoopReg)

3576 return true;

3577 }

3578 return false;

3579}

3580

3581

3582

3585 for (const auto &IE : DDG->getInEdges(SU))

3586 if (InstrToCycle.count(IE.getSrc()))

3587 return false;

3588 return true;

3589}

3590

3591

3596

3597 for (auto &SU : SSD->SUnits)

3600

3602 while (!Worklist.empty()) {

3604 if (DoNotPipeline.count(SU))

3605 continue;

3607 DoNotPipeline.insert(SU);

3608 for (const auto &IE : DDG->getInEdges(SU))

3609 Worklist.push_back(IE.getSrc());

3610

3611

3612

3613 for (const auto &OE : DDG->getOutEdges(SU))

3614 if (OE.getDistance() == 1)

3615 Worklist.push_back(OE.getDst());

3616 }

3617 return DoNotPipeline;

3618}

3619

3620

3621

3625

3626 int NewLastCycle = INT_MIN;

3629 continue;

3631 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);

3632 continue;

3633 }

3634

3635

3638 if (IE.getDistance() == 0)

3639 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);

3640

3641

3642

3644 if (OE.getDistance() == 1)

3645 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);

3646

3647 int OldCycle = InstrToCycle[&SU];

3648 if (OldCycle != NewCycle) {

3649 InstrToCycle[&SU] = NewCycle;

3654 << ") is not pipelined; moving from cycle " << OldCycle

3655 << " to " << NewCycle << " Instr:" << *SU.getInstr());

3656 }

3657

3658

3659

3660

3661

3662

3663

3664

3665

3666

3667

3668

3669

3670

3671

3672

3673

3674

3675

3676

3677

3678

3679

3680 if (FirstCycle + InitiationInterval <= NewCycle)

3681 return false;

3682

3683 NewLastCycle = std::max(NewLastCycle, NewCycle);

3684 }

3685 LastCycle = NewLastCycle;

3686 return true;

3687}

3688

3689

3690

3691

3692

3693

3694

3695

3696

3700 continue;

3702 int CycleDef = InstrToCycle[&SU];

3703 assert(StageDef != -1 && "Instruction should have been scheduled.");

3705 SUnit *Dst = OE.getDst();

3706 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())

3707 if (OE.getReg().isPhysical()) {

3709 return false;

3710 if (InstrToCycle[Dst] <= CycleDef)

3711 return false;

3712 }

3713 }

3714 }

3715 return true;

3716}

3717

3718

3719

3720

3721

3722

3723

3724

3725

3726

3727

3728void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {

3729

3730

3731 typedef std::pair<SUnit *, unsigned> UnitIndex;

3732 std::vector Indices(NodeOrder.size(), std::make_pair(nullptr, 0));

3733

3734 for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)

3735 Indices.push_back(std::make_pair(NodeOrder[i], i));

3736

3737 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {

3738 return std::get<0>(i1) < std::get<0>(i2);

3739 };

3740

3741

3743

3744 bool Valid = true;

3745 (void)Valid;

3746

3747

3748

3749

3750

3751 for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {

3753 unsigned Index = i;

3754

3755 bool PredBefore = false;

3756 bool SuccBefore = false;

3757

3760 (void)Succ;

3761 (void)Pred;

3762

3763 for (const auto &IE : DDG->getInEdges(SU)) {

3764 SUnit *PredSU = IE.getSrc();

3765 unsigned PredIndex = std::get<1>(

3766 *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));

3767 if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {

3768 PredBefore = true;

3769 Pred = PredSU;

3770 break;

3771 }

3772 }

3773

3774 for (const auto &OE : DDG->getOutEdges(SU)) {

3775 SUnit *SuccSU = OE.getDst();

3776

3777

3778

3780 continue;

3781 unsigned SuccIndex = std::get<1>(

3782 *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));

3783 if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {

3784 SuccBefore = true;

3785 Succ = SuccSU;

3786 break;

3787 }

3788 }

3789

3790 if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {

3791

3792

3794 Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });

3795 if (InCircuit)

3797 else {

3799 NumNodeOrderIssues++;

3801 }

3803 << " are scheduled before node " << SU->NodeNum

3804 << "\n");

3805 }

3806 }

3807

3809 if (!Valid)

3810 dbgs() << "Invalid node order found!\n";

3811 });

3812}

3813

3814

3815

3816

3817

3818

3819

3823 for (SUnit *SU : Instrs) {

3825 for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {

3827

3828

3829 if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {

3830

3831

3833 InstrChanges.find(SU);

3834 if (It != InstrChanges.end()) {

3835 unsigned BasePos, OffsetPos;

3836

3837 if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {

3840 int64_t NewOffset =

3841 MI->getOperand(OffsetPos).getImm() - It->second.second;

3845 NewMIs[MI] = NewMI;

3846 }

3847 }

3850 break;

3851 }

3852

3853

3854 unsigned TiedUseIdx = 0;

3855 if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {

3856

3857 OverlapReg = MI->getOperand(TiedUseIdx).getReg();

3858

3859 NewBaseReg = MI->getOperand(i).getReg();

3860 break;

3861 }

3862 }

3863 }

3864}

3865

3866std::deque<SUnit *>

3868 const std::deque<SUnit *> &Instrs) const {

3869 std::deque<SUnit *> NewOrderPhi;

3870 for (SUnit *SU : Instrs) {

3872 NewOrderPhi.push_back(SU);

3873 }

3874 std::deque<SUnit *> NewOrderI;

3875 for (SUnit *SU : Instrs) {

3878 }

3880 return NewOrderPhi;

3881}

3882

3883

3884

3885

3887

3889 for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;

3890 ++stage) {

3891 std::deque<SUnit *> &cycleInstrs =

3892 ScheduledInstrs[cycle + (stage * InitiationInterval)];

3894 ScheduledInstrs[cycle].push_front(SU);

3895 }

3896 }

3897

3898

3899

3900 for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)

3901 ScheduledInstrs.erase(cycle);

3902

3903

3904

3907

3908

3909

3911 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];

3914 }

3915

3917}

3918

3920 os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV

3921 << " depth " << MaxDepth << " col " << Colocate << "\n";

3922 for (const auto &I : Nodes)

3923 os << " SU(" << I->NodeNum << ") " << *(I->getInstr());

3924 os << "\n";

3925}

3926

3927#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

3928

3930

3932

3934 for (SUnit *CI : cycleInstrs->second) {

3935 os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";

3936 os << "(" << CI->NodeNum << ") ";

3938 os << "\n";

3939 }

3940 }

3941}

3942

3943

3946

3947void ResourceManager::dumpMRT() const {

3949 if (UseDFA)

3950 return;

3951 std::stringstream SS;

3952 SS << "MRT:\n";

3953 SS << std::setw(4) << "Slot";

3954 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)

3955 SS << std::setw(3) << I;

3956 SS << std::setw(7) << "#Mops"

3957 << "\n";

3958 for (int Slot = 0; Slot < InitiationInterval; ++Slot) {

3959 SS << std::setw(4) << Slot;

3960 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)

3961 SS << std::setw(3) << MRT[Slot][I];

3962 SS << std::setw(7) << NumScheduledMops[Slot] << "\n";

3963 }

3964 dbgs() << SS.str();

3965 });

3966}

3967#endif

3968

3971 unsigned ProcResourceID = 0;

3972

3973

3974

3975 assert(SM.getNumProcResourceKinds() < 64 &&

3976 "Too many kinds of resources, unsupported");

3977

3978

3979 Masks.resize(SM.getNumProcResourceKinds());

3980 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

3982 if (Desc.SubUnitsIdxBegin)

3983 continue;

3984 Masks[I] = 1ULL << ProcResourceID;

3985 ProcResourceID++;

3986 }

3987

3988 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

3990 if (Desc.SubUnitsIdxBegin)

3991 continue;

3992 Masks[I] = 1ULL << ProcResourceID;

3993 for (unsigned U = 0; U < Desc.NumUnits; ++U)

3994 Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];

3995 ProcResourceID++;

3996 }

3999 dbgs() << "ProcResourceDesc:\n";

4000 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

4002 dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",

4003 ProcResource->Name, I, Masks[I],

4005 }

4006 dbgs() << " -----------------\n";

4007 }

4008 });

4009}

4010

4014 dbgs() << "canReserveResources:\n";

4015 });

4016 if (UseDFA)

4017 return DFAResources[positiveModulo(Cycle, InitiationInterval)]

4019

4021 if (!SCDesc->isValid()) {

4023 dbgs() << "No valid Schedule Class Desc for schedClass!\n";

4025 });

4026 return true;

4027 }

4028

4029 reserveResources(SCDesc, Cycle);

4030 bool Result = !isOverbooked();

4031 unreserveResources(SCDesc, Cycle);

4032

4034 return Result;

4035}

4036

4037void ResourceManager::reserveResources(SUnit &SU, int Cycle) {

4040 dbgs() << "reserveResources:\n";

4041 });

4042 if (UseDFA)

4043 return DFAResources[positiveModulo(Cycle, InitiationInterval)]

4045

4047 if (!SCDesc->isValid()) {

4049 dbgs() << "No valid Schedule Class Desc for schedClass!\n";

4051 });

4052 return;

4053 }

4054

4055 reserveResources(SCDesc, Cycle);

4056

4059 dumpMRT();

4060 dbgs() << "reserveResources: done!\n\n";

4061 }

4062 });

4063}

4064

4065void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,

4070 for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)

4071 ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];

4072

4074 ++NumScheduledMops[positiveModulo(C, InitiationInterval)];

4075}

4076

4077void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,

4082 for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)

4083 --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];

4084

4086 --NumScheduledMops[positiveModulo(C, InitiationInterval)];

4087}

4088

4089bool ResourceManager::isOverbooked() const {

4091 for (int Slot = 0; Slot < InitiationInterval; ++Slot) {

4092 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

4093 const MCProcResourceDesc *Desc = SM.getProcResource(I);

4094 if (MRT[Slot][I] > Desc->NumUnits)

4095 return true;

4096 }

4097 if (NumScheduledMops[Slot] > IssueWidth)

4098 return true;

4099 }

4100 return false;

4101}

4102

4103int ResourceManager::calculateResMIIDFA() const {

4105

4106

4107

4108 FuncUnitSorter FUS = FuncUnitSorter(*ST);

4109 for (SUnit &SU : DAG->SUnits)

4110 FUS.calcCriticalResources(*SU.getInstr());

4111 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>

4112 FuncUnitOrder(FUS);

4113

4114 for (SUnit &SU : DAG->SUnits)

4115 FuncUnitOrder.push(SU.getInstr());

4116

4120

4121 while (!FuncUnitOrder.empty()) {

4122 MachineInstr *MI = FuncUnitOrder.top();

4123 FuncUnitOrder.pop();

4125 continue;

4126

4127

4128

4130 unsigned ReservedCycles = 0;

4131 auto *RI = Resources.begin();

4132 auto *RE = Resources.end();

4134 dbgs() << "Trying to reserve resource for " << NumCycles

4135 << " cycles for \n";

4137 });

4138 for (unsigned C = 0; C < NumCycles; ++C)

4139 while (RI != RE) {

4140 if ((*RI)->canReserveResources(*MI)) {

4141 (*RI)->reserveResources(*MI);

4142 ++ReservedCycles;

4143 break;

4144 }

4145 RI++;

4146 }

4147 LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles

4148 << ", NumCycles:" << NumCycles << "\n");

4149

4150 for (unsigned C = ReservedCycles; C < NumCycles; ++C) {

4152 << "NewResource created to reserve resources"

4153 << "\n");

4155 assert(NewResource->canReserveResources(*MI) && "Reserve error.");

4156 NewResource->reserveResources(*MI);

4157 Resources.push_back(std::unique_ptr(NewResource));

4158 }

4159 }

4160

4161 int Resmii = Resources.size();

4162 LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");

4163 return Resmii;

4164}

4165

4167 if (UseDFA)

4168 return calculateResMIIDFA();

4169

4170

4171

4172

4173 int NumMops = 0;

4175 for (SUnit &SU : DAG->SUnits) {

4177 continue;

4178

4181 continue;

4182

4185 DAG->dumpNode(SU);

4187 << " WriteProcRes: ";

4188 }

4189 });

4192 make_range(STI->getWriteProcResBegin(SCDesc),

4193 STI->getWriteProcResEnd(SCDesc))) {

4197 SM.getProcResource(PRE.ProcResourceIdx);

4198 dbgs() << Desc->Name << ": " << PRE.ReleaseAtCycle << ", ";

4199 }

4200 });

4201 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;

4202 }

4204 }

4205

4206 int Result = (NumMops + IssueWidth - 1) / IssueWidth;

4209 dbgs() << "#Mops: " << NumMops << ", "

4210 << "IssueWidth: " << IssueWidth << ", "

4211 << "Cycles: " << Result << "\n";

4212 });

4213

4216 std::stringstream SS;

4217 SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)

4218 << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"

4219 << "\n";

4220 dbgs() << SS.str();

4221 }

4222 });

4223 for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

4225 int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;

4228 std::stringstream SS;

4229 SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)

4230 << Desc->NumUnits << std::setw(10) << ResourceCount[I]

4231 << std::setw(10) << Cycles << "\n";

4232 dbgs() << SS.str();

4233 }

4234 });

4235 if (Cycles > Result)

4236 Result = Cycles;

4237 }

4238 return Result;

4239}

4240

4242 InitiationInterval = II;

4243 DFAResources.clear();

4244 DFAResources.resize(II);

4245 for (auto &I : DFAResources)

4246 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));

4247 MRT.clear();

4249 NumScheduledMops.clear();

4250 NumScheduledMops.resize(II);

4251}

4252

4254 if (Pred.isArtificial() || Dst->isBoundaryNode())

4255 return true;

4256

4257

4258

4259 return IgnoreAnti && (Pred.getKind() == SDep::Kind::Anti || Distance != 0);

4260}

4261

4262SwingSchedulerDDG::SwingSchedulerDDGEdges &

4263SwingSchedulerDDG::getEdges(const SUnit *SU) {

4264 if (SU == EntrySU)

4265 return EntrySUEdges;

4266 if (SU == ExitSU)

4267 return ExitSUEdges;

4268 return EdgesVec[SU->NodeNum];

4269}

4270

4271const SwingSchedulerDDG::SwingSchedulerDDGEdges &

4272SwingSchedulerDDG::getEdges(const SUnit *SU) const {

4273 if (SU == EntrySU)

4274 return EntrySUEdges;

4275 if (SU == ExitSU)

4276 return ExitSUEdges;

4277 return EdgesVec[SU->NodeNum];

4278}

4279

4280void SwingSchedulerDDG::addEdge(const SUnit *SU,

4281 const SwingSchedulerDDGEdge &Edge) {

4283 "Validation-only edges are not expected here.");

4284 auto &Edges = getEdges(SU);

4285 if (Edge.getSrc() == SU)

4286 Edges.Succs.push_back(Edge);

4287 else

4288 Edges.Preds.push_back(Edge);

4289}

4290

4291void SwingSchedulerDDG::initEdges(SUnit *SU) {

4292 for (const auto &PI : SU->Preds) {

4293 SwingSchedulerDDGEdge Edge(SU, PI, false,

4294 false);

4296 }

4297

4298 for (const auto &SI : SU->Succs) {

4299 SwingSchedulerDDGEdge Edge(SU, SI, true,

4300 false);

4302 }

4303}

4304

4307 : EntrySU(EntrySU), ExitSU(ExitSU) {

4308 EdgesVec.resize(SUnits.size());

4309

4310

4311 initEdges(EntrySU);

4312 initEdges(ExitSU);

4313 for (auto &SU : SUnits)

4314 initEdges(&SU);

4315

4316

4317 for (SUnit &SU : SUnits) {

4318 SUnit *Src = &SU;

4321 Base.setLatency(1);

4322 for (SUnit *Dst : *OD) {

4324 true);

4325 Edge.setDistance(1);

4326 ValidationOnlyEdges.push_back(Edge);

4327 }

4328 }

4329 }

4330}

4331

4332const SwingSchedulerDDG::EdgesType &

4334 return getEdges(SU).Preds;

4335}

4336

4337const SwingSchedulerDDG::EdgesType &

4339 return getEdges(SU).Succs;

4340}

4341

4342

4345

4346 auto ExpandCycle = [&](SUnit *SU) {

4349 return Cycle + (Stage * II);

4350 };

4351

4353 SUnit *Src = Edge.getSrc();

4354 SUnit *Dst = Edge.getDst();

4355 if (!Src->isInstr() || !Dst->isInstr())

4356 continue;

4357 int CycleSrc = ExpandCycle(Src);

4358 int CycleDst = ExpandCycle(Dst);

4359 int MaxLateStart = CycleDst + Edge.getDistance() * II - Edge.getLatency();

4360 if (CycleSrc > MaxLateStart) {

4362 dbgs() << "Validation failed for edge from " << Src->NodeNum << " to "

4363 << Dst->NodeNum << "\n";

4364 });

4365 return false;

4366 }

4367 }

4368 return true;

4369}

4370

4373 for (SUnit &SU : SUnits) {

4374 SUnit *Src = &SU;

4379 SUnit *From = Src;

4380 SUnit *To = Dst;

4383

4384

4385

4386

4387

4388

4389

4390

4391

4392

4393

4394

4395

4396

4397

4398

4402 TII->isGlobalMemoryObject(FromMI) &&

4403 TII->isGlobalMemoryObject(ToMI) && isSuccOrder(From, To)) {

4404 SDep Pred = Dep;

4407 }

4408 }

4409 }

4410 }

4411}

4412

4416

4417 if (!Order)

4418 return;

4419

4420 const auto DumpSU = [](const SUnit *SU) {

4421 std::ostringstream OSS;

4422 OSS << "SU(" << SU->NodeNum << ")";

4423 return OSS.str();

4424 };

4425

4426 dbgs() << " Loop carried edges from " << DumpSU(SU) << "\n"

4427 << " Order\n";

4428 for (SUnit *Dst : *Order)

4429 dbgs() << " " << DumpSU(Dst) << "\n";

4430}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder & UseMI

MachineInstrBuilder MachineInstrBuilder & DefMI

static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

This file contains the simple types necessary to represent the attributes associated with functions a...

This file implements the BitVector class.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

This file defines the DenseMap class.

A common definition of LaneBitmask for use in TableGen and CodeGen.

static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)

print mir2vec MIR2Vec Vocabulary Printer Pass

static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))

A command line argument to force pipeliner to use specified initial interval.

static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))

static bool hasPHICycleDFS(unsigned Reg, const DenseMap< unsigned, SmallVector< unsigned, 2 > > &PhiDeps, SmallSet< unsigned, 8 > &Visited, SmallSet< unsigned, 8 > &RecStack)

Depth-first search to detect cycles among PHI dependencies.

Definition MachinePipeliner.cpp:490

static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))

static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))

static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)

Return the register values for the operands of a Phi instruction.

Definition MachinePipeliner.cpp:963

static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))

static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)

Compute the live-out registers for the instructions in a node-set.

Definition MachinePipeliner.cpp:2331

static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)

Create an instruction stream that represents a single iteration and stage of each instruction.

Definition MachinePipeliner.cpp:1405

static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))

static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)

Return the Phi register value that comes the loop block.

Definition MachinePipeliner.cpp:979

static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)

Return true if Set1 contains elements in Set2.

Definition MachinePipeliner.cpp:2541

static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)

When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...

Definition MachinePipeliner.cpp:2900

static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))

static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))

static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))

A command line option to disable the pruning of loop carried order dependences.

static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))

A command line argument to limit the number of store instructions in the target basic block.

static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))

A command line argument to limit minimum initial interval for pipelining.

static bool isSuccOrder(SUnit *SUa, SUnit *SUb)

Return true if SUb can be reached from SUa following the chain edges.

Definition MachinePipeliner.cpp:988

static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))

A command line argument to limit the number of stages in the pipeline.

static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))

A command line option to enable SWP at -Os.

static bool hasPHICycle(const MachineBasicBlock *LoopHeader, const MachineRegisterInfo &MRI)

Definition MachinePipeliner.cpp:516

static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))

A command line argument to set the window scheduling option.

static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)

Compute the Pred_L(O) set, as defined in the paper.

Definition MachinePipeliner.cpp:2230

static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD, bool PerformCheapCheck)

Returns true if there is a loop-carried order dependency from Src to Dst.

Definition MachinePipeliner.cpp:1055

static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))

static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))

static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)

Return true if there is a path from the specified node to any of the nodes in DestNodes.

Definition MachinePipeliner.cpp:2301

static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)

Compute the Succ_L(O) set, as defined in the paper.

Definition MachinePipeliner.cpp:2266

static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))

static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))

A command line option to turn software pipelining on or off.

static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))

A command line option to disable the pruning of chain dependences due to an unrelated Phi.

static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)

If an instruction has a use that spans multiple iterations, then return true.

Definition MachinePipeliner.cpp:3340

static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)

Definition MachinePipeliner.cpp:2882

Register const TargetRegisterInfo * TRI

Promote Memory to Register

This file provides utility analysis objects describing memory locations.

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the PriorityQueue class.

Remove Loads Into Fake Uses

std::pair< BasicBlock *, BasicBlock * > Edge

This file defines generic set operations that may be used on set's of different types,...

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

Target-Independent Code Generator Pass Configuration Options pass.

Add loop-carried chain dependencies.

Definition MachinePipeliner.cpp:276

void computeDependencies()

The main function to compute loop-carried order-dependencies.

Definition MachinePipeliner.cpp:1130

const BitVector & getLoopCarried(unsigned Idx) const

Definition MachinePipeliner.cpp:335

LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)

Definition MachinePipeliner.cpp:1124

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...

bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool erase(const KeyT &Val)

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

bool skipFunction(const Function &F) const

Optional passes call this function to check whether the pass should be skipped.

AttributeList getAttributes() const

Return the attribute list for this Function.

const InstrStage * beginStage(unsigned ItinClassIndx) const

Return the first stage of the itinerary.

const InstrStage * endStage(unsigned ItinClassIndx) const

Return the last+1 stage of the itinerary.

bool isEmpty() const

Returns true if there are no itineraries.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

TypeSize getValue() const

Represents a single loop in the control flow graph.

unsigned getSchedClass() const

Return the scheduling class for this instruction.

const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const

const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const

Return an iterator at the first process resource consumed by the given scheduling class.

const MCSchedModel & getSchedModel() const

Get the machine model for this subtarget's CPU.

const MDOperand & getOperand(unsigned I) const

ArrayRef< MDOperand > operands() const

unsigned getNumOperands() const

Return number of MDNode operands.

LLVM_ABI StringRef getString() const

MachineInstrBundleIterator< const MachineInstr > const_iterator

iterator_range< iterator > phis()

Returns a range that iterates over the phis in the basic block.

const BasicBlock * getBasicBlock() const

Return the LLVM basic block that this instance corresponded to originally.

LLVM_ABI iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)

Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.

instr_iterator instr_end()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

MachineInstrBundleIterator< MachineInstr > iterator

Analysis pass which computes a MachineDominatorTree.

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Representation of each machine instruction.

bool mayRaiseFPException() const

Return true if this instruction could possibly raise a floating-point exception.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

bool mayLoadOrStore(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly read or modify memory.

const MachineBasicBlock * getParent() const

filtered_mop_range all_defs()

Returns an iterator range over all operands that are (explicit or implicit) register defs.

bool mayLoad(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly read memory.

const MCInstrDesc & getDesc() const

Returns the target instruction descriptor of this MachineInstr.

LLVM_ABI bool hasUnmodeledSideEffects() const

Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...

bool isRegSequence() const

mmo_iterator memoperands_begin() const

Access to memory operands of the instruction.

LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const

Return true if this instruction is identical to Other.

LLVM_ABI bool hasOrderedMemoryRef() const

Return true if this instruction may have an ordered or volatile memory reference, or if the informati...

LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const

Print this MI to OS.

bool mayStore(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly modify memory.

bool isPseudo(QueryType Type=IgnoreBundle) const

Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.

LLVM_ABI void dump() const

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

AAMDNodes getAAInfo() const

Return the AA tags for the memory reference.

const Value * getValue() const

Return the base address of the memory access.

int64_t getOffset() const

For normal values, this is a byte offset added to the base address.

MachineOperand class - Representation of each machine instruction operand.

void setSubReg(unsigned subReg)

unsigned getSubReg() const

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

LLVM_ABI void setReg(Register Reg)

Change the register this operand corresponds to.

Register getReg() const

getReg - Returns the register number.

LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const

Returns true if this operand is identical to the specified operand except for liveness related flags ...

The main class in the implementation of the target independent software pipeliner pass.

bool runOnMachineFunction(MachineFunction &MF) override

The "main" function for implementing Swing Modulo Scheduling.

Definition MachinePipeliner.cpp:363

const TargetInstrInfo * TII

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

Definition MachinePipeliner.cpp:698

const MachineDominatorTree * MDT

const MachineLoopInfo * MLI

MachineOptimizationRemarkEmitter * ORE

RegisterClassInfo RegClassInfo

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

defusechain_instr_iterator< true, false, false, true > use_instr_iterator

use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...

static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())

Return a location that may access any location before or after Ptr, while remaining within the underl...

Expand the kernel using modulo variable expansion algorithm (MVE).

static bool canApply(MachineLoop &L)

Check if ModuloScheduleExpanderMVE can be applied to L.

The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...

void cleanup()

Performs final cleanup after expansion.

void expand()

Performs the actual expansion.

Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...

void annotate()

Performs the annotation.

Represents a schedule for a single-block loop.

A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...

SUnit * getNode(unsigned i) const

void print(raw_ostream &os) const

Definition MachinePipeliner.cpp:3919

void setRecMII(unsigned mii)

unsigned count(SUnit *SU) const

void setColocate(unsigned c)

int compareRecMII(NodeSet &RHS)

LLVM_DUMP_METHOD void dump() const

Definition MachinePipeliner.cpp:3945

AnalysisType & getAnalysis() const

getAnalysis() - This function is used by subclasses to get to the analysis information ...

A reimplementation of ModuloScheduleExpander.

PointerIntPair - This class implements a pair of a pointer and small integer.

Track the current register pressure at some position in the instruction stream, and remember the high...

LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)

Force liveness of virtual registers or physical register units.

unsigned getRegPressureSetLimit(unsigned Idx) const

Get the register unit limit for the given pressure set index.

Wrapper class representing virtual and physical registers.

MCRegister asMCReg() const

Utility to check-convert this value to a MCRegister.

constexpr bool isValid() const

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

int calculateResMII() const

Definition MachinePipeliner.cpp:4166

void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)

Definition MachinePipeliner.cpp:3969

void init(int II)

Initialize resources with the initiation interval II.

Definition MachinePipeliner.cpp:4241

bool canReserveResources(SUnit &SU, int Cycle)

Check if the resources occupied by a machine instruction are available in the current state.

Definition MachinePipeliner.cpp:4011

Kind

These are the different kinds of scheduling dependencies.

@ Order

Any other ordering dependency.

@ Anti

A register anti-dependence (aka WAR).

@ Data

Regular data dependence (aka true-dependence).

void setLatency(unsigned Lat)

Sets the latency for this edge.

@ Barrier

An unknown scheduling barrier.

@ Artificial

Arbitrary strong DAG edge (no real dependence).

This class represents the scheduled code.

std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const

Definition MachinePipeliner.cpp:3867

void setInitiationInterval(int ii)

Set the initiation interval for this schedule.

void dump() const

Utility function used for debugging to print the schedule.

Definition MachinePipeliner.cpp:3944

bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)

Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...

Definition MachinePipeliner.cpp:3250

int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)

Return the cycle of the earliest scheduled instruction in the dependence chain.

Definition MachinePipeliner.cpp:3290

unsigned getMaxStageCount()

Return the maximum stage count needed for this schedule.

void print(raw_ostream &os) const

Print the schedule information to the given output.

Definition MachinePipeliner.cpp:3929

bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const

Return true if all scheduled predecessors are loop-carried output/order dependencies.

Definition MachinePipeliner.cpp:3583

int stageScheduled(SUnit *SU) const

Return the stage for a scheduled instruction.

void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const

Order the instructions within a cycle so that the definitions occur before the uses.

Definition MachinePipeliner.cpp:3401

bool isValidSchedule(SwingSchedulerDAG *SSD)

Definition MachinePipeliner.cpp:3697

int getInitiationInterval() const

Return the initiation interval for this schedule.

std::deque< SUnit * > & getInstructions(int cycle)

Return the instructions that are scheduled at the specified cycle.

int getFirstCycle() const

Return the first cycle in the completed schedule.

DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator

bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const

Return true if the instruction is a definition that is loop carried and defines the use on the next i...

Definition MachinePipeliner.cpp:3561

unsigned cycleScheduled(SUnit *SU) const

Return the cycle for a scheduled instruction.

SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)

Determine transitive dependences of unpipelineable instructions.

Definition MachinePipeliner.cpp:3592

void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)

Compute the scheduling start slot for the instruction.

Definition MachinePipeliner.cpp:3351

bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)

Definition MachinePipeliner.cpp:3622

bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const

Return true if the scheduled Phi has a loop carried operand.

Definition MachinePipeliner.cpp:3532

int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)

Return the cycle of the latest scheduled instruction in the dependence chain.

Definition MachinePipeliner.cpp:3314

int getFinalCycle() const

Return the last cycle in the finalized schedule.

void finalizeSchedule(SwingSchedulerDAG *SSD)

After the schedule has been formed, call this function to combine the instructions from the different...

Definition MachinePipeliner.cpp:3886

Scheduling unit. This is a node in the scheduling DAG.

bool isInstr() const

Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.

unsigned NodeNum

Entry # of node in the node vector.

void setInstr(MachineInstr *MI)

Assigns the instruction for the SUnit.

LLVM_ABI void removePred(const SDep &D)

Removes the specified edge as a pred of the current node if it exists.

bool isPred(const SUnit *N) const

Tests if node N is a predecessor of this node.

unsigned short Latency

Node latency.

bool isBoundaryNode() const

Boundary nodes are placeholders for the boundary of the scheduling region.

bool hasPhysRegDefs

Has physreg defs that are being used.

SmallVector< SDep, 4 > Succs

All sunit successors.

SmallVector< SDep, 4 > Preds

All sunit predecessors.

LLVM_ABI bool addPred(const SDep &D, bool Required=true)

Adds the specified edge as a pred of the current node if not already.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

DenseMap< MachineInstr *, SUnit * > MISUnitMap

After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...

virtual void finishBlock()

Cleans up after scheduling in the given block.

MachineBasicBlock * BB

The block in which to insert instructions.

void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)

Builds SUnits for the current region.

SUnit * getSUnit(MachineInstr *MI) const

Returns an existing SUnit for this MI, or nullptr.

void dump() const override

LLVM_ABI void AddPred(SUnit *Y, SUnit *X)

Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.

LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)

Checks if SU is reachable from TargetSU.

MachineRegisterInfo & MRI

Virtual/real register map.

const TargetInstrInfo * TII

Target instruction information.

std::vector< SUnit > SUnits

The scheduling units.

const TargetRegisterInfo * TRI

Target processor register info.

SUnit EntrySU

Special node for the region entry.

MachineFunction & MF

Machine function.

SUnit ExitSU

Special node for the region exit.

A vector that has set insertion semantics.

size_type size() const

Determine the number of elements in the SetVector.

void insert_range(Range &&R)

size_type count(const_arg_type key) const

Count the number of elements of a given key in the SetVector.

typename vector_type::const_iterator iterator

bool contains(const_arg_type key) const

Check if the SetVector contains the given key.

void clear()

Completely clear the SetVector.

bool empty() const

Determine if the SetVector is empty or not.

bool insert(const value_type &X)

Insert a new element into the SetVector.

SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)

Insert the given machine instruction into the mapping.

bool erase(PtrType Ptr)

Remove pointer from the set.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...

void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)

Apply changes to the instruction if needed.

Definition MachinePipeliner.cpp:3060

const SwingSchedulerDDG * getDDG() const

void finishBlock() override

Clean up after the software pipeliner runs.

Definition MachinePipeliner.cpp:952

void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)

Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...

Definition MachinePipeliner.cpp:3820

bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const

Return true for an order or output dependence that is loop carried potentially.

Definition MachinePipeliner.cpp:3209

void schedule() override

We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...

Definition MachinePipeliner.cpp:762

bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const

Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...

Definition MachinePipeliner.cpp:3114

Register getInstrBaseReg(SUnit *SU) const

Return the new base register that was stored away for the changed instruction.

Represents a dependence between two instruction.

SUnit * getDst() const

Returns the SUnit to which the edge points (destination node).

bool ignoreDependence(bool IgnoreAnti) const

Returns true for DDG nodes that we ignore when computing the cost functions.

Definition MachinePipeliner.cpp:4253

SUnit * getSrc() const

Returns the SUnit from which the edge comes (source node).

This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...

SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)

Definition MachinePipeliner.cpp:4305

const EdgesType & getInEdges(const SUnit *SU) const

Definition MachinePipeliner.cpp:4333

bool isValidSchedule(const SMSchedule &Schedule) const

Check if Schedule doesn't violate the validation-only dependencies.

Definition MachinePipeliner.cpp:4343

const EdgesType & getOutEdges(const SUnit *SU) const

Definition MachinePipeliner.cpp:4338

Object returned by analyzeLoopForPipelining.

virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0

Return true if the given instruction should not be pipelined and should be ignored.

TargetInstrInfo - Interface to description of machine instruction set.

bool isZeroCost(unsigned Opcode) const

Return true for pseudo instructions that don't consume any machine resources in their current form.

virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const

Create machine specific model for scheduling.

virtual bool isPostIncrement(const MachineInstr &MI) const

Return true for post-incremented instructions.

virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const

Return true if the instruction contains a base register and offset.

virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const

Sometimes, it is possible for the target to tell, even without aliasing information,...

bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const

Get the base operand and byte offset of an instruction that reads/writes memory.

Primary interface to the complete machine description for the target machine.

Target-Independent Code Generator Pass Configuration Options.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

virtual bool enableMachinePipeliner() const

True if the subtarget should run MachinePipeliner.

virtual bool useDFAforSMS() const

Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...

virtual const TargetInstrInfo * getInstrInfo() const

virtual const TargetRegisterInfo * getRegisterInfo() const =0

Return the target's register information.

virtual const InstrItineraryData * getInstrItineraryData() const

getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

Wrapper class representing a virtual register or register unit.

constexpr bool isVirtualReg() const

constexpr MCRegUnit asMCRegUnit() const

constexpr Register asVirtualReg() const

The main class in the implementation of the target independent window scheduler.

std::pair< iterator, bool > insert(const ValueT &V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

This class implements an extremely fast bulk output stream that can only output to a stream.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

@ Valid

The data is already valid.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

DiagnosticInfoOptimizationBase::Argument NV

NodeAddr< DefNode * > Def

NodeAddr< PhiNode * > Phi

NodeAddr< UseNode * > Use

std::set< NodeId > NodeSet

friend class Instruction

Iterator for Instructions in a `BasicBlock.

BaseReg

Stack frame base register. Bit 0 of FREInfo.Info.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

void stable_sort(R &&Range)

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

bool set_is_subset(const S1Ty &S1, const S2Ty &S2)

set_is_subset(A, B) - Return true iff A in B

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

constexpr int popcount(T Value) noexcept

Count the number of set bits in a value.

void erase(Container &C, ValueType V)

Wrapper function to remove a value from a container:

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

static int64_t computeDelta(SectionEntry *A, SectionEntry *B)

@ WS_Force

Use window algorithm after SMS algorithm fails.

@ WS_On

Turn off window algorithm.

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

cl::opt< bool > SwpEnableCopyToPhi

unsigned getRegState(const MachineOperand &RegOp)

Get all register state flags from machine operand RegOp.

auto lower_bound(R &&Range, T &&Value)

Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...

LLVM_ABI char & MachinePipelinerID

This pass performs software pipelining on machine instructions.

Definition MachinePipeliner.cpp:233

DWARFExpression::Operation Op

ArrayRef(const T &OneElt) -> ArrayRef< T >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

cl::opt< int > SwpForceIssueWidth

A command line argument to force pipeliner to use specified issue width.

@ Increment

Incrementally increasing token ID.

AAResults AliasAnalysis

Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.

LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)

This method is similar to getUnderlyingObject except that it can look through phi and select instruct...

LLVM_ABI bool isIdentifiedObject(const Value *V)

Return true if this pointer refers to a distinct and identifiable object.

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This class holds an SUnit corresponding to a memory operation and other information related to the in...

Definition MachinePipeliner.cpp:248

SUnit * SU

Definition MachinePipeliner.cpp:249

const Value * MemOpValue

The value of a memory operand.

Definition MachinePipeliner.cpp:253

SmallVector< const Value *, 2 > UnderlyingObjs

Definition MachinePipeliner.cpp:250

bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const

Definition MachinePipeliner.cpp:1019

AAMDNodes AATags

Definition MachinePipeliner.cpp:258

int64_t MemOpOffset

The offset of a memory operand.

Definition MachinePipeliner.cpp:256

bool IsAllIdentified

True if all the underlying objects are identified.

Definition MachinePipeliner.cpp:261

SUnitWithMemInfo(SUnit *SU)

Definition MachinePipeliner.cpp:1009

bool isUnknown() const

Definition MachinePipeliner.cpp:267

A collection of metadata nodes that might be associated with a memory access used by the alias-analys...

uint64_t FuncUnits

Bitmask representing a set of functional units.

static constexpr LaneBitmask getNone()

Represents loop-carried dependencies.

SmallSetVector< SUnit *, 8 > OrderDep

const OrderDep * getOrderDepOrNull(SUnit *Key) const

void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)

Adds some edges to the original DAG that correspond to loop-carried dependencies.

Definition MachinePipeliner.cpp:4371

void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const

Definition MachinePipeliner.cpp:4413

Define a kind of processor resource that will be modeled by the scheduler.

Summarize the scheduling resources required for an instruction of a particular scheduling class.

Machine model for scheduling, bundling, and heuristics.

const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const

bool hasInstrSchedModel() const

Does this machine model include instruction-level scheduling.

const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const

Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

std::vector< unsigned > MaxSetPressure

Map of max reg pressure indexed by pressure set ID, not class ID.