LLVM: lib/CodeGen/MachinePipeliner.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

73#include "llvm/Config/llvm-config.h"

84#include

85#include

86#include

87#include

88#include

89#include

90#include

91#include

92#include

93#include

94#include

95#include

96#include

97#include

98

99using namespace llvm;

100

101#define DEBUG_TYPE "pipeliner"

102

103STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");

104STATISTIC(NumPipelined, "Number of loops software pipelined");

105STATISTIC(NumNodeOrderIssues, "Number of node order issues found");

106STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");

107STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");

108STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");

109STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");

110STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");

111STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");

112STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");

113STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");

114

115

117 cl::desc("Enable Software Pipelining"));

118

119

123

124

126 cl::desc("Size limit for the MII."),

128

129

130

132 cl::desc("Force pipeliner to use specified II."),

134

135

138 cl::desc("Maximum stages allowed in the generated scheduled."),

140

141

142

145 cl::desc("Prune dependences between unrelated Phi nodes."),

147

148

149

152 cl::desc("Prune loop carried order dependences."),

154

155#ifndef NDEBUG

157#endif

158

162

167

170 cl::desc("Instead of emitting the pipelined code, annotate instructions "

171 "with the generated schedule for feeding into the "

172 "-modulo-schedule-test pass"));

173

177 "Use the experimental peeling code generator for software pipelining"));

178

180 cl::desc("Range to search for II"),

182

185 cl::desc("Limit register pressure of scheduled loop"));

186

190 cl::desc("Margin representing the unused percentage of "

191 "the register pressure limit"));

192

195 cl::desc("Use the MVE code generator for software pipelining"));

196

197namespace llvm {

198

199

202 cl::desc("Enable CopyToPhi DAG Mutation"));

203

204

205

207 "pipeliner-force-issue-width",

208 cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,

210

211

214 cl::desc("Set how to use window scheduling algorithm."),

216 "Turn off window algorithm."),

218 "Use window algorithm after SMS algorithm fails."),

220 "Use window algorithm instead of SMS algorithm.")));

221

222}

223

224unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;

226#ifndef NDEBUG

228#endif

230

232 "Modulo Software Pipelining", false, false)

239

240

242 if (skipFunction(mf.getFunction()))

243 return false;

244

246 return false;

247

248 if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&

250 return false;

251

252 if (!mf.getSubtarget().enableMachinePipeliner())

253 return false;

254

255

256

257 if (mf.getSubtarget().useDFAforSMS() &&

258 (!mf.getSubtarget().getInstrItineraryData() ||

259 mf.getSubtarget().getInstrItineraryData()->isEmpty()))

260 return false;

261

262 MF = &mf;

263 MLI = &getAnalysis().getLI();

264 MDT = &getAnalysis().getDomTree();

265 ORE = &getAnalysis().getORE();

266 TII = MF->getSubtarget().getInstrInfo();

267 RegClassInfo.runOnMachineFunction(*MF);

268

269 for (const auto &L : *MLI)

270 scheduleLoop(*L);

271

272 return false;

273}

274

275

276

277

278

279bool MachinePipeliner::scheduleLoop(MachineLoop &L) {

280 bool Changed = false;

281 for (const auto &InnerLoop : L)

282 Changed |= scheduleLoop(*InnerLoop);

283

284#ifndef NDEBUG

285

287 if (Limit >= 0) {

289 return Changed;

291 }

292#endif

293

294 setPragmaPipelineOptions(L);

295 if (!canPipelineLoop(L)) {

296 LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");

299 L.getStartLoc(), L.getHeader())

300 << "Failed to pipeline loop";

301 });

302

304 return Changed;

305 }

306

307 ++NumTrytoPipeline;

308 if (useSwingModuloScheduler())

309 Changed = swingModuloScheduler(L);

310

311 if (useWindowScheduler(Changed))

312 Changed = runWindowScheduler(L);

313

315 return Changed;

316}

317

318void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {

319

322

324

325 if (LBLK == nullptr)

326 return;

327

329 if (BBLK == nullptr)

330 return;

331

333 if (TI == nullptr)

334 return;

335

337 if (LoopID == nullptr)

338 return;

339

342

344 MDNode *MD = dyn_cast(MDO);

345

346 if (MD == nullptr)

347 continue;

348

350

351 if (S == nullptr)

352 continue;

353

354 if (S->getString() == "llvm.loop.pipeline.initiationinterval") {

356 "Pipeline initiation interval hint metadata should have two operands.");

358 mdconst::extract(MD->getOperand(1))->getZExtValue();

359 assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");

360 } else if (S->getString() == "llvm.loop.pipeline.disable") {

362 }

363 }

364}

365

366

367

368

369bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {

370 if (L.getNumBlocks() != 1) {

373 L.getStartLoc(), L.getHeader())

374 << "Not a single basic block: "

375 << ore::NV("NumBlocks", L.getNumBlocks());

376 });

377 return false;

378 }

379

383 L.getStartLoc(), L.getHeader())

384 << "Disabled by Pragma.";

385 });

386 return false;

387 }

388

389

390

391 LI.TBB = nullptr;

392 LI.FBB = nullptr;

395 LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");

396 NumFailBranch++;

399 L.getStartLoc(), L.getHeader())

400 << "The branch can't be understood";

401 });

402 return false;

403 }

404

409 LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");

410 NumFailLoop++;

413 L.getStartLoc(), L.getHeader())

414 << "The loop structure is not supported";

415 });

416 return false;

417 }

418

419 if (L.getLoopPreheader()) {

420 LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");

421 NumFailPreheader++;

424 L.getStartLoc(), L.getHeader())

425 << "No loop preheader found";

426 });

427 return false;

428 }

429

430

431 preprocessPhiNodes(*L.getHeader());

432 return true;

433}

434

438 *getAnalysis().getLIS().getSlotIndexes();

439

443 auto *RC = MRI.getRegClass(DefOp.getReg());

444

445 for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {

448 continue;

449

450

451

452 Register NewReg = MRI.createVirtualRegister(RC);

460 RegOp.setReg(NewReg);

462 }

463 }

464}

465

466

467

468

469

470bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {

471 assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");

472

474 *this, L, getAnalysis().getLIS(), RegClassInfo,

476

478

479

480 SMS.startBlock(MBB);

481

482

483

487 I != E; ++I, --size)

488 ;

489

491 SMS.schedule();

492 SMS.exitRegion();

493

494 SMS.finishBlock();

495 return SMS.hasNewSchedule();

496}

497

507}

508

509bool MachinePipeliner::runWindowScheduler(MachineLoop &L) {

511 Context.MF = MF;

514 Context.PassConfig = &getAnalysis();

515 Context.AA = &getAnalysis().getAAResults();

516 Context.LIS = &getAnalysis().getLIS();

519 return WS.run();

520}

521

522bool MachinePipeliner::useSwingModuloScheduler() {

523

525}

526

527bool MachinePipeliner::useWindowScheduler(bool Changed) {

528

529

530

531

533 LLVM_DEBUG(dbgs() << "Window scheduling is disabled when "

534 "llvm.loop.pipeline.initiationinterval is set.\n");

535 return false;

536 }

537

540}

541

542void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {

545 else if (II_setByPragma > 0)

546 MII = II_setByPragma;

547 else

548 MII = std::max(ResMII, RecMII);

549}

550

551void SwingSchedulerDAG::setMAX_II() {

554 else if (II_setByPragma > 0)

555 MAX_II = II_setByPragma;

556 else

558}

559

560

561

565 addLoopCarriedDependences(AA);

566 updatePhiDependences();

568 changeDependences();

569 postProcessDAG();

572

574 findCircuits(NodeSets);

576

577

578 unsigned ResMII = calculateResMII();

579 unsigned RecMII = calculateRecMII(NodeSets);

580

581 fuseRecs(NodeSets);

582

583

585 RecMII = 0;

586

587 setMII(ResMII, RecMII);

588 setMAX_II();

589

590 LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II

591 << " (rec=" << RecMII << ", res=" << ResMII << ")\n");

592

593

594 if (MII == 0) {

595 LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");

596 NumFailZeroMII++;

597 Pass.ORE->emit([&]() {

600 << "Invalid Minimal Initiation Interval: 0";

601 });

602 return;

603 }

604

605

608 << ", we don't pipeline large loops\n");

609 NumFailLargeMaxMII++;

610 Pass.ORE->emit([&]() {

613 << "Minimal Initiation Interval too large: "

614 << ore::NV("MII", (int)MII) << " > "

616 << "Refer to -pipeliner-max-mii.";

617 });

618 return;

619 }

620

621 computeNodeFunctions(NodeSets);

622

623 registerPressureFilter(NodeSets);

624

625 colocateNodeSets(NodeSets);

626

627 checkNodeSets(NodeSets);

628

630 for (auto &I : NodeSets) {

631 dbgs() << " Rec NodeSet ";

632 I.dump();

633 }

634 });

635

637

638 groupRemainingNodes(NodeSets);

639

640 removeDuplicateNodes(NodeSets);

641

643 for (auto &I : NodeSets) {

644 dbgs() << " NodeSet ";

645 I.dump();

646 }

647 });

648

649 computeNodeOrder(NodeSets);

650

651

652 checkValidNodeOrder(Circuits);

653

655 Scheduled = schedulePipeline(Schedule);

656

657 if (!Scheduled){

659 NumFailNoSchedule++;

660 Pass.ORE->emit([&]() {

663 << "Unable to find schedule";

664 });

665 return;

666 }

667

669

670 if (numStages == 0) {

671 LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");

672 NumFailZeroStage++;

673 Pass.ORE->emit([&]() {

676 << "No need to pipeline - no overlapped iterations in schedule.";

677 });

678 return;

679 }

680

683 << " : too many stages, abort\n");

684 NumFailLargeMaxStage++;

685 Pass.ORE->emit([&]() {

688 << "Too many stages in schedule: "

689 << ore::NV("numStages", (int)numStages) << " > "

691 << ". Refer to -pipeliner-max-stages.";

692 });

693 return;

694 }

695

696 Pass.ORE->emit([&]() {

699 << "Pipelined succesfully!";

700 });

701

702

704 std::vector<MachineInstr *> OrderedInsts;

708 OrderedInsts.push_back(SU->getInstr());

709 Cycles[SU->getInstr()] = Cycle;

710 Stages[SU->getInstr()] = Schedule.stageScheduled(SU);

711 }

712 }

714 for (auto &KV : NewMIs) {

715 Cycles[KV.first] = Cycles[KV.second];

716 Stages[KV.first] = Stages[KV.second];

717 NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];

718 }

719

721 std::move(Stages));

724 "Cannot serialize a schedule with InstrChanges!");

727 return;

728 }

729

738 } else {

742 }

743 ++NumPipelined;

744}

745

746

748 for (auto &KV : NewMIs)

750 NewMIs.clear();

751

752

754}

755

756

757

759 unsigned &InitVal, unsigned &LoopVal) {

760 assert(Phi.isPHI() && "Expecting a Phi.");

761

762 InitVal = 0;

763 LoopVal = 0;

764 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

765 if (Phi.getOperand(i + 1).getMBB() != Loop)

766 InitVal = Phi.getOperand(i).getReg();

767 else

768 LoopVal = Phi.getOperand(i).getReg();

769

770 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");

771}

772

773

776 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

777 if (Phi.getOperand(i + 1).getMBB() == LoopBB)

778 return Phi.getOperand(i).getReg();

779 return 0;

780}

781

782

787 while (!Worklist.empty()) {

789 for (const auto &SI : SU->Succs) {

790 SUnit *SuccSU = SI.getSUnit();

792 if (Visited.count(SuccSU))

793 continue;

794 if (SuccSU == SUb)

795 return true;

797 Visited.insert(SuccSU);

798 }

799 }

800 }

801 return false;

802}

803

804

805

807 return MI.isCall() || MI.mayRaiseFPException() ||

808 MI.hasUnmodeledSideEffects() ||

809 (MI.hasOrderedMemoryRef() &&

810 (MI.mayLoad() || MI.isDereferenceableInvariantLoad()));

811}

812

813

814

815

818 if (MI->hasOneMemOperand())

819 return;

822 return;

824 for (const Value *V : Objs) {

827 return;

828 }

829 }

830}

831

832

833

834

835

836void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {

840 for (auto &SU : SUnits) {

843 PendingLoads.clear();

844 else if (MI.mayLoad()) {

847 if (Objs.empty())

849 for (const auto *V : Objs) {

852 }

853 } else if (MI.mayStore()) {

856 if (Objs.empty())

858 for (const auto *V : Objs) {

860 PendingLoads.find(V);

861 if (I == PendingLoads.end())

862 continue;

863 for (auto *Load : I->second) {

865 continue;

867

868

869

871 int64_t Offset1, Offset2;

872 bool Offset1IsScalable, Offset2IsScalable;

874 Offset1IsScalable, TRI) &&

876 Offset2IsScalable, TRI)) {

878 Offset1IsScalable == Offset2IsScalable &&

879 (int)Offset1 < (int)Offset2) {

881 "What happened to the chain edge?");

883 Dep.setLatency(1);

884 SU.addPred(Dep);

885 continue;

886 }

887 }

888

889

890

891 if (!AA) {

893 Dep.setLatency(1);

894 SU.addPred(Dep);

895 continue;

896 }

901 Dep.setLatency(1);

902 SU.addPred(Dep);

903 continue;

904 }

908 Dep.setLatency(1);

909 SU.addPred(Dep);

910 continue;

911 }

917 Dep.setLatency(1);

918 SU.addPred(Dep);

919 }

920 }

921 }

922 }

923 }

924}

925

926

927

928

929

930

931

932void SwingSchedulerDAG::updatePhiDependences() {

935

936

938 RemoveDeps.clear();

939

940 unsigned HasPhiUse = 0;

941 unsigned HasPhiDef = 0;

943

945 if (!MO.isReg())

946 continue;

948 if (MO.isDef()) {

949

953 UI != UE; ++UI) {

956 if (SU != nullptr && UseMI->isPHI()) {

957 if (MI->isPHI()) {

959 Dep.setLatency(1);

960 I.addPred(Dep);

961 } else {

962 HasPhiDef = Reg;

963

964

965 if (SU->NodeNum < I.NodeNum && I.isPred(SU))

967 }

968 }

969 }

970 } else if (MO.isUse()) {

971

973 if (DefMI == nullptr)

974 continue;

976 if (SU != nullptr && DefMI->isPHI()) {

977 if (MI->isPHI()) {

979 Dep.setLatency(0);

980 ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep,

982 I.addPred(Dep);

983 } else {

984 HasPhiUse = Reg;

985

986

987 if (SU->NodeNum < I.NodeNum && I.isPred(SU))

989 }

990 }

991 }

992 }

993

995 continue;

996 for (auto &PI : I.Preds) {

997 MachineInstr *PMI = PI.getSUnit()->getInstr();

999 if (I.getInstr()->isPHI()) {

1001 continue;

1003 continue;

1004 }

1006 }

1007 }

1008 for (const SDep &D : RemoveDeps)

1009 I.removePred(D);

1010 }

1011}

1012

1013

1014

1015void SwingSchedulerDAG::changeDependences() {

1016

1017

1018

1020 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;

1021 int64_t NewOffset = 0;

1022 if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,

1023 NewOffset))

1024 continue;

1025

1026

1027 Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();

1030 continue;

1032 if (!DefSU)

1033 continue;

1034

1036 if (!LastMI)

1037 continue;

1039 if (!LastSU)

1040 continue;

1041

1043 continue;

1044

1045

1047 for (const SDep &P : I.Preds)

1048 if (P.getSUnit() == DefSU)

1050 for (const SDep &D : Deps) {

1052 I.removePred(D);

1053 }

1054

1055 Deps.clear();

1056 for (auto &P : LastSU->Preds)

1057 if (P.getSUnit() == &I && P.getKind() == SDep::Order)

1058 Deps.push_back(P);

1059 for (const SDep &D : Deps) {

1062 }

1063

1064

1065

1069

1070

1071

1072 InstrChanges[&I] = std::make_pair(NewBase, NewOffset);

1073 }

1074}

1075

1076

1077

1078

1079

1080

1083 std::vector<MachineInstr *> &OrderedInsts,

1086

1087

1090 for (int Stage = 0, LastStage = Schedule.getMaxStageCount();

1091 Stage <= LastStage; ++Stage) {

1094 Instrs[Cycle].push_front(SU);

1095 }

1096 }

1097 }

1098

1101 std::deque<SUnit *> &CycleInstrs = Instrs[Cycle];

1103 for (SUnit *SU : CycleInstrs) {

1105 OrderedInsts.push_back(MI);

1107 }

1108 }

1109}

1110

1111namespace {

1112

1113

1114

1115struct FuncUnitSorter {

1119

1121 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}

1122

1123

1124

1125

1126 unsigned minFuncUnits(const MachineInstr *Inst,

1129 unsigned min = UINT_MAX;

1130 if (InstrItins && !InstrItins->isEmpty()) {

1133 InstrItins->endStage(SchedClass))) {

1135 unsigned numAlternatives = llvm::popcount(funcUnits);

1136 if (numAlternatives < min) {

1137 min = numAlternatives;

1138 F = funcUnits;

1139 }

1140 }

1141 return min;

1142 }

1147

1148

1149 return min;

1150

1154 if (!PRE.ReleaseAtCycle)

1155 continue;

1158 unsigned NumUnits = ProcResource->NumUnits;

1159 if (NumUnits < min) {

1160 min = NumUnits;

1161 F = PRE.ProcResourceIdx;

1162 }

1163 }

1164 return min;

1165 }

1166 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");

1167 }

1168

1169

1170

1171

1172

1173

1175 unsigned SchedClass = MI.getDesc().getSchedClass();

1176 if (InstrItins && !InstrItins->isEmpty()) {

1179 InstrItins->endStage(SchedClass))) {

1182 Resources[FuncUnits]++;

1183 }

1184 return;

1185 }

1190

1191

1192 return;

1193

1197 if (!PRE.ReleaseAtCycle)

1198 continue;

1199 Resources[PRE.ProcResourceIdx]++;

1200 }

1201 return;

1202 }

1203 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");

1204 }

1205

1206

1209 unsigned MFUs1 = minFuncUnits(IS1, F1);

1210 unsigned MFUs2 = minFuncUnits(IS2, F2);

1211 if (MFUs1 == MFUs2)

1212 return Resources.lookup(F1) < Resources.lookup(F2);

1213 return MFUs1 > MFUs2;

1214 }

1215};

1216

1217

1218class HighRegisterPressureDetector {

1222

1223 const unsigned PSetNum;

1224

1225

1226

1227

1228

1229 std::vector InitSetPressure;

1230

1231

1232

1233 std::vector PressureSetLimit;

1234

1236

1238

1239public:

1240 using OrderedInstsTy = std::vector<MachineInstr *>;

1242

1243private:

1244 static void dumpRegisterPressures(const std::vector &Pressures) {

1245 if (Pressures.size() == 0) {

1246 dbgs() << "[]";

1247 } else {

1249 for (unsigned P : Pressures) {

1252 }

1253 dbgs() << ']';

1254 }

1255 }

1256

1259 for (auto PSetIter = MRI.getPressureSets(Reg); PSetIter.isValid();

1260 ++PSetIter) {

1261 dbgs() << *PSetIter << ' ';

1262 }

1263 dbgs() << '\n';

1264 }

1265

1266 void increaseRegisterPressure(std::vector &Pressure,

1268 auto PSetIter = MRI.getPressureSets(Reg);

1269 unsigned Weight = PSetIter.getWeight();

1270 for (; PSetIter.isValid(); ++PSetIter)

1271 Pressure[*PSetIter] += Weight;

1272 }

1273

1274 void decreaseRegisterPressure(std::vector &Pressure,

1276 auto PSetIter = MRI.getPressureSets(Reg);

1277 unsigned Weight = PSetIter.getWeight();

1278 for (; PSetIter.isValid(); ++PSetIter) {

1279 auto &P = Pressure[*PSetIter];

1281 "register pressure must be greater than or equal weight");

1282 P -= Weight;

1283 }

1284 }

1285

1286

1287 bool isReservedRegister(Register Reg) const {

1288 return Reg.isPhysical() && MRI.isReserved(Reg.asMCReg());

1289 }

1290

1291 bool isDefinedInThisLoop(Register Reg) const {

1292 return Reg.isVirtual() && MRI.getVRegDef(Reg)->getParent() == OrigMBB;

1293 }

1294

1295

1296

1297

1298

1299

1300

1301

1302

1303 void computeLiveIn() {

1305 for (auto &MI : *OrigMBB) {

1306 if (MI.isDebugInstr())

1307 continue;

1308 for (auto &Use : ROMap[&MI].Uses) {

1309 auto Reg = Use.RegUnit;

1310

1311

1313 continue;

1314 if (isReservedRegister(Reg))

1315 continue;

1316 if (isDefinedInThisLoop(Reg))

1317 continue;

1319 }

1320 }

1321

1322 for (auto LiveIn : Used)

1323 increaseRegisterPressure(InitSetPressure, LiveIn);

1324 }

1325

1326

1328 for (unsigned PSet = 0; PSet < PSetNum; PSet++)

1330 }

1331

1332

1333

1334

1335

1336

1337

1338

1339

1340

1341

1342

1343 Instr2LastUsesTy computeLastUses(const OrderedInstsTy &OrderedInsts,

1344 Instr2StageTy &Stages) const {

1345

1346

1347

1348

1350 const auto UpdateTargetRegs = [this, &TargetRegs](Register Reg) {

1351 if (isDefinedInThisLoop(Reg))

1353 };

1355 if (MI->isPHI()) {

1357 UpdateTargetRegs(Reg);

1358 } else {

1359 for (auto &Use : ROMap.find(MI)->getSecond().Uses)

1360 UpdateTargetRegs(Use.RegUnit);

1361 }

1362 }

1363

1364 const auto InstrScore = [&Stages](MachineInstr *MI) {

1365 return Stages[MI] + MI->isPHI();

1366 };

1367

1370 for (auto &Use : ROMap.find(MI)->getSecond().Uses) {

1371 auto Reg = Use.RegUnit;

1373 continue;

1375 if (!Inserted) {

1378 if (InstrScore(Orig) < InstrScore(New))

1379 Ite->second = New;

1380 }

1381 }

1382 }

1383

1384 Instr2LastUsesTy LastUses;

1385 for (auto &Entry : LastUseMI)

1386 LastUses[Entry.second].insert(Entry.first);

1387 return LastUses;

1388 }

1389

1390

1391

1392

1393

1394

1395

1396

1397

1398

1399

1400

1401

1402 std::vector

1403 computeMaxSetPressure(const OrderedInstsTy &OrderedInsts,

1404 Instr2StageTy &Stages,

1405 const unsigned StageCount) const {

1407

1408

1409

1411

1412 auto CurSetPressure = InitSetPressure;

1413 auto MaxSetPressure = InitSetPressure;

1414 auto LastUses = computeLastUses(OrderedInsts, Stages);

1415

1417 dbgs() << "Ordered instructions:\n";

1419 dbgs() << "Stage " << Stages[MI] << ": ";

1421 }

1422 });

1423

1424 const auto InsertReg = [this, &CurSetPressure](RegSetTy &RegSet,

1426 if (Reg.isValid() || isReservedRegister(Reg))

1427 return;

1428

1429 bool Inserted = RegSet.insert(Reg).second;

1430 if (!Inserted)

1431 return;

1432

1434 increaseRegisterPressure(CurSetPressure, Reg);

1436 };

1437

1438 const auto EraseReg = [this, &CurSetPressure](RegSetTy &RegSet,

1440 if (Reg.isValid() || isReservedRegister(Reg))

1441 return;

1442

1443

1444 if (!RegSet.contains(Reg))

1445 return;

1446

1448 RegSet.erase(Reg);

1449 decreaseRegisterPressure(CurSetPressure, Reg);

1451 };

1452

1453 for (unsigned I = 0; I < StageCount; I++) {

1455 const auto Stage = Stages[MI];

1456 if (I < Stage)

1457 continue;

1458

1459 const unsigned Iter = I - Stage;

1460

1461 for (auto &Def : ROMap.find(MI)->getSecond().Defs)

1462 InsertReg(LiveRegSets[Iter], Def.RegUnit);

1463

1464 for (auto LastUse : LastUses[MI]) {

1465 if (MI->isPHI()) {

1466 if (Iter != 0)

1467 EraseReg(LiveRegSets[Iter - 1], LastUse);

1468 } else {

1469 EraseReg(LiveRegSets[Iter], LastUse);

1470 }

1471 }

1472

1473 for (unsigned PSet = 0; PSet < PSetNum; PSet++)

1474 MaxSetPressure[PSet] =

1475 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);

1476

1478 dbgs() << "CurSetPressure=";

1479 dumpRegisterPressures(CurSetPressure);

1480 dbgs() << " iter=" << Iter << " stage=" << Stage << ":";

1482 });

1483 }

1484 }

1485

1486 return MaxSetPressure;

1487 }

1488

1489public:

1492 : OrigMBB(OrigMBB), MRI(MF.getRegInfo()),

1493 TRI(MF.getSubtarget().getRegisterInfo()),

1494 PSetNum(TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),

1495 PressureSetLimit(PSetNum, 0) {}

1496

1497

1498

1501 if (MI.isDebugInstr())

1502 continue;

1503 ROMap[&MI].collect(MI, *TRI, MRI, false, true);

1504 }

1505

1506 computeLiveIn();

1507 computePressureSetLimit(RCI);

1508 }

1509

1510

1511

1513 const unsigned MaxStage) const {

1515 "the percentage of the margin must be between 0 to 100");

1516

1517 OrderedInstsTy OrderedInsts;

1518 Instr2StageTy Stages;

1520 const auto MaxSetPressure =

1521 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);

1522

1524 dbgs() << "Dump MaxSetPressure:\n";

1525 for (unsigned I = 0; I < MaxSetPressure.size(); I++) {

1526 dbgs() << format("MaxSetPressure[%d]=%d\n", I, MaxSetPressure[I]);

1527 }

1528 dbgs() << '\n';

1529 });

1530

1531 for (unsigned PSet = 0; PSet < PSetNum; PSet++) {

1532 unsigned Limit = PressureSetLimit[PSet];

1534 LLVM_DEBUG(dbgs() << "PSet=" << PSet << " Limit=" << Limit

1535 << " Margin=" << Margin << "\n");

1536 if (Limit < MaxSetPressure[PSet] + Margin) {

1539 << "Rejected the schedule because of too high register pressure\n");

1540 return true;

1541 }

1542 }

1543 return false;

1544 }

1545};

1546

1547}

1548

1549

1550

1551

1552

1553

1554

1555unsigned SwingSchedulerDAG::calculateResMII() {

1558 return RM.calculateResMII();

1559}

1560

1561

1562

1563

1564

1565

1566

1567unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {

1568 unsigned RecMII = 0;

1569

1570 for (NodeSet &Nodes : NodeSets) {

1571 if (Nodes.empty())

1572 continue;

1573

1574 unsigned Delay = Nodes.getLatency();

1575 unsigned Distance = 1;

1576

1577

1578 unsigned CurMII = (Delay + Distance - 1) / Distance;

1579 Nodes.setRecMII(CurMII);

1580 if (CurMII > RecMII)

1581 RecMII = CurMII;

1582 }

1583

1584 return RecMII;

1585}

1586

1587

1588void SwingSchedulerDAG::Circuits::createAdjacencyStructure(

1592 for (int i = 0, e = SUnits.size(); i != e; ++i) {

1594

1595 for (auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {

1596

1597

1598 if (OE.isOutputDep()) {

1599 int N = OE.getDst()->NodeNum;

1600 int BackEdge = i;

1601 auto Dep = OutputDeps.find(BackEdge);

1602 if (Dep != OutputDeps.end()) {

1603 BackEdge = Dep->second;

1604 OutputDeps.erase(Dep);

1605 }

1606 OutputDeps[N] = BackEdge;

1607 }

1608

1609 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())

1610 continue;

1611

1612

1613

1614

1615

1616

1617

1618 if (OE.isAntiDep())

1619 continue;

1620

1621 int N = OE.getDst()->NodeNum;

1622 if (Added.test(N)) {

1623 AdjK[i].push_back(N);

1625 }

1626 }

1627

1628

1629 for (auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {

1630 SUnit *Src = IE.getSrc();

1631 SUnit *Dst = IE.getDst();

1632 if (!Dst->getInstr()->mayStore() || !DAG->isLoopCarriedDep(IE))

1633 continue;

1634 if (IE.isOrderDep() && Src->getInstr()->mayLoad()) {

1635 int N = Src->NodeNum;

1636 if (Added.test(N)) {

1637 AdjK[i].push_back(N);

1639 }

1640 }

1641 }

1642 }

1643

1644 for (auto &OD : OutputDeps)

1645 if (Added.test(OD.second)) {

1646 AdjK[OD.first].push_back(OD.second);

1647 Added.set(OD.second);

1648 }

1649}

1650

1651

1652

1653bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,

1655 bool HasBackedge) {

1656 SUnit *SV = &SUnits[V];

1657 bool F = false;

1658 Stack.insert(SV);

1659 Blocked.set(V);

1660

1661 for (auto W : AdjK[V]) {

1662 if (NumPaths > MaxPaths)

1663 break;

1664 if (W < S)

1665 continue;

1666 if (W == S) {

1667 if (!HasBackedge)

1669 F = true;

1670 ++NumPaths;

1671 break;

1672 }

1673 if (!Blocked.test(W)) {

1674 if (circuit(W, S, NodeSets, DAG,

1675 Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))

1676 F = true;

1677 }

1678 }

1679

1680 if (F)

1681 unblock(V);

1682 else {

1683 for (auto W : AdjK[V]) {

1684 if (W < S)

1685 continue;

1686 B[W].insert(SV);

1687 }

1688 }

1689 Stack.pop_back();

1690 return F;

1691}

1692

1693

1694void SwingSchedulerDAG::Circuits::unblock(int U) {

1695 Blocked.reset(U);

1697 while (!BU.empty()) {

1699 assert(SI != BU.end() && "Invalid B set.");

1702 if (Blocked.test(W->NodeNum))

1703 unblock(W->NodeNum);

1704 }

1705}

1706

1707

1708

1709void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {

1710 Circuits Cir(SUnits, Topo);

1711

1712 Cir.createAdjacencyStructure(this);

1713 for (int I = 0, E = SUnits.size(); I != E; ++I) {

1714 Cir.reset();

1715 Cir.circuit(I, I, NodeSets, this);

1716 }

1717}

1718

1719

1720

1721

1722

1723

1724

1725

1726

1727

1728

1729

1730

1731

1732

1733

1734

1735

1736

1737void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {

1739

1741 continue;

1742

1743

1745

1747

1748 for (auto &Dep : SU.Preds) {

1749 SUnit *TmpSU = Dep.getSUnit();

1752

1755

1756

1759 }

1760

1761 if (PHISUs.size() == 0 || SrcSUs.size() == 0)

1762 continue;

1763

1764

1765

1767

1768 for (size_t Index = 0; Index < PHISUs.size(); ++Index) {

1769 for (auto &Dep : PHISUs[Index]->Succs) {

1771 continue;

1772

1773 SUnit *TmpSU = Dep.getSUnit();

1777 continue;

1778 }

1780 }

1781 }

1782

1783 if (UseSUs.size() == 0)

1784 continue;

1785

1787

1788 for (auto *I : UseSUs) {

1789 for (auto *Src : SrcSUs) {

1790 if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {

1793 }

1794 }

1795 }

1796 }

1797}

1798

1799

1800

1801

1802

1803

1804

1805void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {

1806 ScheduleInfo.resize(SUnits.size());

1807

1809 for (int I : Topo) {

1812 }

1813 });

1814

1815 int maxASAP = 0;

1816

1817 for (int I : Topo) {

1818 int asap = 0;

1819 int zeroLatencyDepth = 0;

1821 for (const auto &IE : DDG->getInEdges(SU)) {

1822 SUnit *Pred = IE.getSrc();

1823 if (IE.getLatency() == 0)

1824 zeroLatencyDepth =

1826 if (IE.ignoreDependence(true))

1827 continue;

1828 asap = std::max(asap, (int)(getASAP(Pred) + IE.getLatency() -

1829 IE.getDistance() * MII));

1830 }

1831 maxASAP = std::max(maxASAP, asap);

1832 ScheduleInfo[I].ASAP = asap;

1833 ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;

1834 }

1835

1836

1838 int alap = maxASAP;

1839 int zeroLatencyHeight = 0;

1841 for (const auto &OE : DDG->getOutEdges(SU)) {

1842 SUnit *Succ = OE.getDst();

1844 continue;

1845 if (OE.getLatency() == 0)

1846 zeroLatencyHeight =

1848 if (OE.ignoreDependence(true))

1849 continue;

1850 alap = std::min(alap, (int)(getALAP(Succ) - OE.getLatency() +

1851 OE.getDistance() * MII));

1852 }

1853

1854 ScheduleInfo[I].ALAP = alap;

1855 ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;

1856 }

1857

1858

1860 I.computeNodeSetInfo(this);

1861

1863 for (unsigned i = 0; i < SUnits.size(); i++) {

1864 dbgs() << "\tNode " << i << ":\n";

1872 }

1873 });

1874}

1875

1876

1877

1878

1881 const NodeSet *S = nullptr) {

1883

1885 for (const auto &IE : DDG->getInEdges(SU)) {

1886 SUnit *PredSU = IE.getSrc();

1887 if (S && S->count(PredSU) == 0)

1888 continue;

1889 if (IE.ignoreDependence(true))

1890 continue;

1891 if (NodeOrder.count(PredSU) == 0)

1892 Preds.insert(PredSU);

1893 }

1894

1895

1896

1897

1898

1899 for (const auto &OE : DDG->getOutEdges(SU)) {

1900 SUnit *SuccSU = OE.getDst();

1901 if (!OE.isAntiDep())

1902 continue;

1903 if (S && S->count(SuccSU) == 0)

1904 continue;

1905 if (NodeOrder.count(SuccSU) == 0)

1906 Preds.insert(SuccSU);

1907 }

1908 }

1909 return !Preds.empty();

1910}

1911

1912

1913

1914

1917 const NodeSet *S = nullptr) {

1919

1921 for (const auto &OE : DDG->getOutEdges(SU)) {

1922 SUnit *SuccSU = OE.getDst();

1923 if (S && S->count(SuccSU) == 0)

1924 continue;

1925 if (OE.ignoreDependence(false))

1926 continue;

1927 if (NodeOrder.count(SuccSU) == 0)

1928 Succs.insert(SuccSU);

1929 }

1930

1931

1932

1933

1934

1935 for (const auto &IE : DDG->getInEdges(SU)) {

1936 SUnit *PredSU = IE.getSrc();

1937 if (!IE.isAntiDep())

1938 continue;

1939 if (S && S->count(PredSU) == 0)

1940 continue;

1941 if (NodeOrder.count(PredSU) == 0)

1942 Succs.insert(PredSU);

1943 }

1944 }

1945 return !Succs.empty();

1946}

1947

1948

1949

1956 return false;

1958 return false;

1959 if (DestNodes.contains(Cur))

1960 return true;

1961 if (!Visited.insert(Cur).second)

1962 return Path.contains(Cur);

1963 bool FoundPath = false;

1964 for (const auto &OE : DDG->getOutEdges(Cur))

1965 if (!OE.ignoreDependence(false))

1966 FoundPath |=

1967 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);

1968 for (const auto &IE : DDG->getInEdges(Cur))

1969 if (IE.isAntiDep() && IE.getDistance() == 0)

1970 FoundPath |=

1971 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);

1972 if (FoundPath)

1973 Path.insert(Cur);

1974 return FoundPath;

1975}

1976

1977

1978

1979

1986 for (SUnit *SU : NS) {

1988 if (MI->isPHI())

1989 continue;

1992 if (Reg.isVirtual())

1993 Uses.insert(Reg);

1994 else if (MRI.isAllocatable(Reg))

1995 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))

1996 Uses.insert(Unit);

1997 }

1998 }

1999 for (SUnit *SU : NS)

2001 if (!MO.isDead()) {

2003 if (Reg.isVirtual()) {

2004 if (Uses.count(Reg))

2006 } else if (MRI.isAllocatable(Reg)) {

2007 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))

2008 if (Uses.count(Unit))

2010 }

2011 }

2013}

2014

2015

2016

2017void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {

2018 for (auto &NS : NodeSets) {

2019

2020 if (NS.size() <= 2)

2021 continue;

2024 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);

2026 RecRPTracker.closeBottom();

2027

2028 std::vector<SUnit *> SUnits(NS.begin(), NS.end());

2030 return A->NodeNum > B->NodeNum;

2031 });

2032

2033 for (auto &SU : SUnits) {

2034

2035

2036

2037

2039 RecRPTracker.setPos(std::next(CurInstI));

2040

2043 RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,

2044 CriticalPSets,

2048 dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "

2051 NS.setExceedPressure(SU);

2052 break;

2053 }

2054 RecRPTracker.recede();

2055 }

2056 }

2057}

2058

2059

2060

2061void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {

2062 unsigned Colocate = 0;

2063 for (int i = 0, e = NodeSets.size(); i < e; ++i) {

2064 NodeSet &N1 = NodeSets[i];

2067 continue;

2068 for (int j = i + 1; j < e; ++j) {

2071 continue;

2073 if (N2.empty() || succ\_L(N2, S2, DDG.get()))

2074 continue;

2078 break;

2079 }

2080 }

2081 }

2082}

2083

2084

2085

2086

2087

2088

2089void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {

2090

2091 if (MII < 17)

2092 return;

2093

2094 for (auto &NS : NodeSets) {

2095 if (NS.getRecMII() > 2)

2096 return;

2097 if (NS.getMaxDepth() > MII)

2098 return;

2099 }

2100 NodeSets.clear();

2101 LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");

2102}

2103

2104

2105

2106void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {

2109

2110

2111 for (NodeSet &I : NodeSets) {

2113

2114 if (succ_L(I, N, DDG.get())) {

2116 for (SUnit *NI : N) {

2117 Visited.clear();

2118 computePath(NI, Path, NodesAdded, I, Visited, DDG.get());

2119 }

2120 if (Path.empty())

2121 I.insert(Path.begin(), Path.end());

2122 }

2123

2124 N.clear();

2125 if (succ_L(NodesAdded, N, DDG.get())) {

2127 for (SUnit *NI : N) {

2128 Visited.clear();

2129 computePath(NI, Path, I, NodesAdded, Visited, DDG.get());

2130 }

2131 if (Path.empty())

2132 I.insert(Path.begin(), Path.end());

2133 }

2134 NodesAdded.insert(I.begin(), I.end());

2135 }

2136

2137

2138

2141 if (succ_L(NodesAdded, N, DDG.get()))

2143 addConnectedNodes(I, NewSet, NodesAdded);

2144 if (!NewSet.empty())

2145 NodeSets.push_back(NewSet);

2146

2147

2148

2150 if (pred_L(NodesAdded, N, DDG.get()))

2152 addConnectedNodes(I, NewSet, NodesAdded);

2153 if (!NewSet.empty())

2154 NodeSets.push_back(NewSet);

2155

2156

2157

2159 if (NodesAdded.count(&SU) == 0) {

2161 addConnectedNodes(&SU, NewSet, NodesAdded);

2162 if (!NewSet.empty())

2163 NodeSets.push_back(NewSet);

2164 }

2165 }

2166}

2167

2168

2169void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,

2172 NodesAdded.insert(SU);

2173 for (auto &OE : DDG->getOutEdges(SU)) {

2175 if (!OE.isArtificial() && Successor->isBoundaryNode() &&

2177 addConnectedNodes(Successor, NewSet, NodesAdded);

2178 }

2179 for (auto &IE : DDG->getInEdges(SU)) {

2180 SUnit *Predecessor = IE.getSrc();

2181 if (IE.isArtificial() && NodesAdded.count(Predecessor) == 0)

2182 addConnectedNodes(Predecessor, NewSet, NodesAdded);

2183 }

2184}

2185

2186

2187

2190 Result.clear();

2191 for (SUnit *SU : Set1) {

2192 if (Set2.count(SU) != 0)

2193 Result.insert(SU);

2194 }

2195 return !Result.empty();

2196}

2197

2198

2199void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {

2201 ++I) {

2208 for (SUnit *SU : *J)

2209 I->insert(SU);

2210 NodeSets.erase(J);

2211 E = NodeSets.end();

2212 } else {

2213 ++J;

2214 }

2215 }

2216 }

2217}

2218

2219

2220void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {

2222 ++I)

2224 J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });

2225

2226 if (J->empty()) {

2227 NodeSets.erase(J);

2228 E = NodeSets.end();

2229 } else {

2230 ++J;

2231 }

2232 }

2233}

2234

2235

2236

2237

2238

2239void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {

2241 NodeOrder.clear();

2242

2243 for (auto &Nodes : NodeSets) {

2244 LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");

2245 OrderKind Order;

2248 R.insert(N.begin(), N.end());

2249 Order = BottomUp;

2251 } else if (succ_L(NodeOrder, N, DDG.get()) &&

2253 R.insert(N.begin(), N.end());

2254 Order = TopDown;

2257

2258

2259 Order = TopDown;

2261 } else if (NodeSets.size() == 1) {

2262 for (const auto &N : Nodes)

2263 if (N->Succs.size() == 0)

2264 R.insert(N);

2265 Order = BottomUp;

2267 } else {

2268

2269 SUnit *maxASAP = nullptr;

2270 for (SUnit *SU : Nodes) {

2271 if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||

2273 maxASAP = SU;

2274 }

2275 R.insert(maxASAP);

2276 Order = BottomUp;

2278 }

2279

2280 while (R.empty()) {

2281 if (Order == TopDown) {

2282

2283

2284

2285 while (R.empty()) {

2286 SUnit *maxHeight = nullptr;

2287 for (SUnit *I : R) {

2289 maxHeight = I;

2292 maxHeight = I;

2297 maxHeight = I;

2298 }

2299 NodeOrder.insert(maxHeight);

2301 R.remove(maxHeight);

2302 for (const auto &OE : DDG->getOutEdges(maxHeight)) {

2303 SUnit *SU = OE.getDst();

2304 if (Nodes.count(SU) == 0)

2305 continue;

2306 if (NodeOrder.contains(SU))

2307 continue;

2308 if (OE.ignoreDependence(false))

2309 continue;

2310 R.insert(SU);

2311 }

2312

2313

2314

2315

2316

2317 for (const auto &IE : DDG->getInEdges(maxHeight)) {

2318 SUnit *SU = IE.getSrc();

2319 if (IE.isAntiDep())

2320 continue;

2321 if (Nodes.count(SU) == 0)

2322 continue;

2323 if (NodeOrder.contains(SU))

2324 continue;

2325 R.insert(SU);

2326 }

2327 }

2328 Order = BottomUp;

2329 LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");

2331 if (pred_L(NodeOrder, N, DDG.get(), &Nodes))

2332 R.insert(N.begin(), N.end());

2333 } else {

2334

2335

2336

2337 while (R.empty()) {

2338 SUnit *maxDepth = nullptr;

2339 for (SUnit *I : R) {

2341 maxDepth = I;

2344 maxDepth = I;

2348 maxDepth = I;

2349 }

2350 NodeOrder.insert(maxDepth);

2352 R.remove(maxDepth);

2353 if (Nodes.isExceedSU(maxDepth)) {

2354 Order = TopDown;

2355 R.clear();

2356 R.insert(Nodes.getNode(0));

2357 break;

2358 }

2359 for (const auto &IE : DDG->getInEdges(maxDepth)) {

2360 SUnit *SU = IE.getSrc();

2361 if (Nodes.count(SU) == 0)

2362 continue;

2363 if (NodeOrder.contains(SU))

2364 continue;

2365 R.insert(SU);

2366 }

2367

2368

2369

2370

2371

2372 for (const auto &OE : DDG->getOutEdges(maxDepth)) {

2373 SUnit *SU = OE.getDst();

2374 if (!OE.isAntiDep())

2375 continue;

2376 if (Nodes.count(SU) == 0)

2377 continue;

2378 if (NodeOrder.contains(SU))

2379 continue;

2380 R.insert(SU);

2381 }

2382 }

2383 Order = TopDown;

2384 LLVM_DEBUG(dbgs() << "\n Switching order to top down ");

2386 if (succ_L(NodeOrder, N, DDG.get(), &Nodes))

2387 R.insert(N.begin(), N.end());

2388 }

2389 }

2391 }

2392

2394 dbgs() << "Node order: ";

2395 for (SUnit *I : NodeOrder)

2396 dbgs() << " " << I->NodeNum << " ";

2397 dbgs() << "\n";

2398 });

2399}

2400

2401

2402

2403bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {

2404

2405 if (NodeOrder.empty()){

2406 LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );

2407 return false;

2408 }

2409

2410 bool scheduleFound = false;

2411 std::unique_ptr HRPDetector;

2413 HRPDetector =

2414 std::make_unique(Loop.getHeader(), MF);

2415 HRPDetector->init(RegClassInfo);

2416 }

2417

2418 for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {

2419 Schedule.reset();

2421 LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");

2422

2425 do {

2426 SUnit *SU = *NI;

2427

2428

2429

2430 int EarlyStart = INT_MIN;

2431 int LateStart = INT_MAX;

2432 Schedule.computeStart(SU, &EarlyStart, &LateStart, II, this);

2434 dbgs() << "\n";

2435 dbgs() << "Inst (" << SU->NodeNum << ") ";

2437 dbgs() << "\n";

2438 });

2440 dbgs() << format("\tes: %8x ls: %8x\n", EarlyStart, LateStart));

2441

2442 if (EarlyStart > LateStart)

2443 scheduleFound = false;

2444 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)

2445 scheduleFound =

2446 Schedule.insert(SU, EarlyStart, EarlyStart + (int)II - 1, II);

2447 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)

2448 scheduleFound =

2449 Schedule.insert(SU, LateStart, LateStart - (int)II + 1, II);

2450 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {

2451 LateStart = std::min(LateStart, EarlyStart + (int)II - 1);

2452

2453

2454

2455

2456

2457

2460 scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II);

2461 else

2462 scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II);

2463 } else {

2465 scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),

2467 }

2468

2469

2470

2471 if (scheduleFound)

2474 scheduleFound = false;

2475

2477 if (!scheduleFound)

2478 dbgs() << "\tCan't schedule\n";

2479 });

2480 } while (++NI != NE && scheduleFound);

2481

2482

2483 if (scheduleFound)

2484 scheduleFound =

2486

2487

2488 if (scheduleFound)

2490

2491

2492

2494 scheduleFound =

2495 !HRPDetector->detect(this, Schedule, Schedule.getMaxStageCount());

2496 }

2497

2498 LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound

2500 << ")\n");

2501

2502 if (scheduleFound) {

2503 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);

2504 if (!scheduleFound)

2506 }

2507

2508 if (scheduleFound) {

2510 Pass.ORE->emit([&]() {

2513 << "Schedule found with Initiation Interval: "

2515 << ", MaxStageCount: "

2517 });

2518 } else

2519 Schedule.reset();

2520

2522}

2523

2524

2525

2526bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const {

2530 bool OffsetIsScalable;

2532 return false;

2533

2534

2535 if (OffsetIsScalable)

2536 return false;

2537

2538 if (!BaseOp->isReg())

2539 return false;

2540

2542

2544

2546 if (BaseDef && BaseDef->isPHI()) {

2549 }

2550 if (!BaseDef)

2551 return false;

2552

2553 int D = 0;

2555 return false;

2556

2557 Delta = D;

2558 return true;

2559}

2560

2561

2562

2563

2564

2565

2566

2567

2568bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,

2569 unsigned &BasePos,

2570 unsigned &OffsetPos,

2571 unsigned &NewBase,

2573

2575 return false;

2576 unsigned BasePosLd, OffsetPosLd;

2578 return false;

2579 Register BaseReg = MI->getOperand(BasePosLd).getReg();

2580

2581

2584 if (!Phi || Phi->isPHI())

2585 return false;

2586

2587 unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());

2588 if (!PrevReg)

2589 return false;

2590

2591

2593 if (!PrevDef || PrevDef == MI)

2594 return false;

2595

2597 return false;

2598

2599 unsigned BasePos1 = 0, OffsetPos1 = 0;

2601 return false;

2602

2603

2604

2605 int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();

2606 int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();

2608 NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);

2611 if (!Disjoint)

2612 return false;

2613

2614

2615 BasePos = BasePosLd;

2616 OffsetPos = OffsetPosLd;

2617 NewBase = PrevReg;

2618 Offset = StoreOffset;

2619 return true;

2620}

2621

2622

2623

2628 InstrChanges.find(SU);

2629 if (It != InstrChanges.end()) {

2630 std::pair<unsigned, int64_t> RegAndOffset = It->second;

2631 unsigned BasePos, OffsetPos;

2633 return;

2634 Register BaseReg = MI->getOperand(BasePos).getReg();

2635 MachineInstr *LoopDef = findDefInLoop(BaseReg);

2640 if (BaseStageNum < DefStageNum) {

2642 int OffsetDiff = DefStageNum - BaseStageNum;

2643 if (DefCycleNum < BaseCycleNum) {

2645 if (OffsetDiff > 0)

2646 --OffsetDiff;

2647 }

2648 int64_t NewOffset =

2649 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;

2653 NewMIs[MI] = NewMI;

2654 }

2655 }

2656}

2657

2658

2659

2660

2664 while (Def->isPHI()) {

2665 if (!Visited.insert(Def).second)

2666 break;

2667 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)

2668 if (Def->getOperand(i + 1).getMBB() == BB) {

2669 Def = MRI.getVRegDef(Def->getOperand(i).getReg());

2670 break;

2671 }

2672 }

2673 return Def;

2674}

2675

2676

2677

2678

2683 return false;

2684

2686 return true;

2687

2689 return true;

2690

2693 assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");

2694

2695

2699 return true;

2700

2702 return false;

2703

2704

2705

2706

2707 unsigned DeltaS, DeltaD;

2708 if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))

2709 return true;

2710

2712 int64_t OffsetS, OffsetD;

2713 bool OffsetSIsScalable, OffsetDIsScalable;

2719 return true;

2720

2721 assert(!OffsetSIsScalable && !OffsetDIsScalable &&

2722 "Expected offsets to be byte offsets");

2723

2726 if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI())

2727 return true;

2728

2729 unsigned InitValS = 0;

2730 unsigned LoopValS = 0;

2731 unsigned InitValD = 0;

2732 unsigned LoopValD = 0;

2737

2739 return true;

2740

2741

2742

2744 int D = 0;

2746 return true;

2747

2750

2751

2752

2754 return true;

2755

2756 if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() ||

2757 DeltaD < AccessSizeD.getValue())

2758 return true;

2759

2760 return (OffsetS + (int64_t)AccessSizeS.getValue() <

2761 OffsetD + (int64_t)AccessSizeD.getValue());

2762}

2763

2764void SwingSchedulerDAG::postProcessDAG() {

2765 for (auto &M : Mutations)

2766 M->apply(this);

2767}

2768

2769

2770

2771

2772

2773

2775 bool forward = true;

2777 dbgs() << "Trying to insert node between " << StartCycle << " and "

2778 << EndCycle << " II: " << II << "\n";

2779 });

2780 if (StartCycle > EndCycle)

2781 forward = false;

2782

2783

2784 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;

2785 for (int curCycle = StartCycle; curCycle != termCycle;

2786 forward ? ++curCycle : --curCycle) {

2787

2788 if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||

2791 dbgs() << "\tinsert at cycle " << curCycle << " ";

2793 });

2794

2795 if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))

2796 ProcItinResources.reserveResources(*SU, curCycle);

2797 ScheduledInstrs[curCycle].push_back(SU);

2798 InstrToCycle.insert(std::make_pair(SU, curCycle));

2799 if (curCycle > LastCycle)

2800 LastCycle = curCycle;

2801 if (curCycle < FirstCycle)

2802 FirstCycle = curCycle;

2803 return true;

2804 }

2806 dbgs() << "\tfailed to insert at cycle " << curCycle << " ";

2808 });

2809 }

2810 return false;

2811}

2812

2813

2819 int EarlyCycle = INT_MAX;

2820 while (!Worklist.empty()) {

2823 if (Visited.count(PrevSU))

2824 continue;

2825 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);

2826 if (it == InstrToCycle.end())

2827 continue;

2828 EarlyCycle = std::min(EarlyCycle, it->second);

2829 for (const auto &IE : DDG->getInEdges(PrevSU))

2830 if (IE.isOrderDep() || IE.isOutputDep())

2832 Visited.insert(PrevSU);

2833 }

2834 return EarlyCycle;

2835}

2836

2837

2843 int LateCycle = INT_MIN;

2844 while (!Worklist.empty()) {

2848 continue;

2849 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);

2850 if (it == InstrToCycle.end())

2851 continue;

2852 LateCycle = std::max(LateCycle, it->second);

2853 for (const auto &OE : DDG->getOutEdges(SuccSU))

2854 if (OE.isOrderDep() || OE.isOutputDep())

2856 Visited.insert(SuccSU);

2857 }

2858 return LateCycle;

2859}

2860

2861

2862

2863

2865 for (auto &P : SU->Preds)

2866 if (P.getKind() == SDep::Anti && P.getSUnit()->getInstr()->isPHI())

2867 for (auto &S : P.getSUnit()->Succs)

2868 if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())

2869 return P.getSUnit();

2870 return nullptr;

2871}

2872

2873

2874

2878

2879

2880

2881

2882 for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {

2884 for (const auto &IE : DDG->getInEdges(SU)) {

2885 if (IE.getSrc() == I) {

2886

2887

2890 *MinLateStart = std::min(*MinLateStart, End);

2891 }

2892 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() * II;

2893 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);

2894 }

2895 }

2896

2897 for (const auto &OE : DDG->getOutEdges(SU)) {

2898 if (OE.getDst() == I) {

2899

2900

2903 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);

2904 }

2905 int LateStart = cycle - OE.getLatency() + OE.getDistance() * II;

2906 *MinLateStart = std::min(*MinLateStart, LateStart);

2907 }

2908 }

2909

2911 for (const auto &Dep : SU->Preds) {

2912

2913

2914 if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&

2916 *MinLateStart = std::min(*MinLateStart, cycle);

2917 }

2918 }

2919 }

2920}

2921

2922

2923

2924

2926 std::deque<SUnit *> &Insts) const {

2928 bool OrderBeforeUse = false;

2929 bool OrderAfterDef = false;

2930 bool OrderBeforeDef = false;

2931 unsigned MoveDef = 0;

2932 unsigned MoveUse = 0;

2935

2936 unsigned Pos = 0;

2937 for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;

2938 ++I, ++Pos) {

2940 if (!MO.isReg() || !MO.getReg().isVirtual())

2941 continue;

2942

2944 unsigned BasePos, OffsetPos;

2945 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))

2946 if (MI->getOperand(BasePos).getReg() == Reg)

2948 Reg = NewReg;

2950 std::tie(Reads, Writes) =

2951 (*I)->getInstr()->readsWritesVirtualRegister(Reg);

2952 if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {

2953 OrderBeforeUse = true;

2954 if (MoveUse == 0)

2955 MoveUse = Pos;

2956 } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {

2957

2958 OrderAfterDef = true;

2959 MoveDef = Pos;

2962 OrderBeforeUse = true;

2963 if (MoveUse == 0)

2964 MoveUse = Pos;

2965 } else {

2966 OrderAfterDef = true;

2967 MoveDef = Pos;

2968 }

2970 OrderBeforeUse = true;

2971 if (MoveUse == 0)

2972 MoveUse = Pos;

2973 if (MoveUse != 0) {

2974 OrderAfterDef = true;

2975 MoveDef = Pos - 1;

2976 }

2978

2979 OrderBeforeUse = true;

2980 if (MoveUse == 0)

2981 MoveUse = Pos;

2982 } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&

2984 if (MoveUse == 0) {

2985 OrderBeforeDef = true;

2986 MoveUse = Pos;

2987 }

2988 }

2989 }

2990

2991

2993 if (OE.getDst() != *I)

2994 continue;

2995 if (OE.isOrderDep() && stageScheduled(*I) == StageInst1) {

2996 OrderBeforeUse = true;

2997 if (Pos < MoveUse)

2998 MoveUse = Pos;

2999 }

3000

3001

3002

3003 else if ((OE.isAntiDep() || OE.isOutputDep()) &&

3005 OrderBeforeUse = true;

3006 if ((MoveUse == 0) || (Pos < MoveUse))

3007 MoveUse = Pos;

3008 }

3009 }

3010 for (auto &IE : DDG->getInEdges(SU)) {

3011 if (IE.getSrc() != *I)

3012 continue;

3013 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&

3015 OrderAfterDef = true;

3016 MoveDef = Pos;

3017 }

3018 }

3019 }

3020

3021

3022 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)

3023 OrderBeforeUse = false;

3024

3025

3026

3027 if (OrderBeforeDef)

3028 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);

3029

3030

3031

3032 if (OrderBeforeUse && OrderAfterDef) {

3033 SUnit *UseSU = Insts.at(MoveUse);

3034 SUnit *DefSU = Insts.at(MoveDef);

3035 if (MoveUse > MoveDef) {

3036 Insts.erase(Insts.begin() + MoveUse);

3037 Insts.erase(Insts.begin() + MoveDef);

3038 } else {

3039 Insts.erase(Insts.begin() + MoveDef);

3040 Insts.erase(Insts.begin() + MoveUse);

3041 }

3045 return;

3046 }

3047

3048

3049 if (OrderBeforeUse)

3050 Insts.push_front(SU);

3051 else

3052 Insts.push_back(SU);

3053}

3054

3055

3058 if (!Phi.isPHI())

3059 return false;

3060 assert(Phi.isPHI() && "Expecting a Phi.");

3064

3065 unsigned InitVal = 0;

3066 unsigned LoopVal = 0;

3067 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);

3069 if (!UseSU)

3070 return true;

3072 return true;

3075 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);

3076}

3077

3078

3079

3080

3081

3082

3083

3084

3088 if (!MO.isReg())

3089 return false;

3090 if (Def->isPHI())

3091 return false;

3093 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())

3094 return false;

3096 return false;

3097 unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());

3099 if (DMO.getReg() == LoopReg)

3100 return true;

3101 }

3102 return false;

3103}

3104

3105

3106

3109 for (const auto &IE : DDG->getInEdges(SU))

3110 if (InstrToCycle.count(IE.getSrc()))

3111 return false;

3112 return true;

3113}

3114

3115

3120

3121 for (auto &SU : SSD->SUnits)

3124

3126 while (!Worklist.empty()) {

3128 if (DoNotPipeline.count(SU))

3129 continue;

3131 DoNotPipeline.insert(SU);

3132 for (const auto &IE : DDG->getInEdges(SU))

3133 Worklist.push_back(IE.getSrc());

3134

3135

3136

3137 for (const auto &OE : DDG->getOutEdges(SU))

3138 if (OE.getDistance() == 1)

3139 Worklist.push_back(OE.getDst());

3140 }

3141 return DoNotPipeline;

3142}

3143

3144

3145

3149

3150 int NewLastCycle = INT_MIN;

3153 continue;

3155 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);

3156 continue;

3157 }

3158

3159

3162 if (IE.getDistance() == 0)

3163 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);

3164

3165

3166

3168 if (OE.getDistance() == 1)

3169 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);

3170

3171 int OldCycle = InstrToCycle[&SU];

3172 if (OldCycle != NewCycle) {

3173 InstrToCycle[&SU] = NewCycle;

3178 << ") is not pipelined; moving from cycle " << OldCycle

3179 << " to " << NewCycle << " Instr:" << *SU.getInstr());

3180 }

3181 NewLastCycle = std::max(NewLastCycle, NewCycle);

3182 }

3183 LastCycle = NewLastCycle;

3184 return true;

3185}

3186

3187

3188

3189

3190

3191

3192

3193

3194

3198 continue;

3200 int CycleDef = InstrToCycle[&SU];

3201 assert(StageDef != -1 && "Instruction should have been scheduled.");

3203 SUnit *Dst = OE.getDst();

3204 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())

3205 if (OE.getReg().isPhysical()) {

3207 return false;

3208 if (InstrToCycle[Dst] <= CycleDef)

3209 return false;

3210 }

3211 }

3212 }

3213 return true;

3214}

3215

3216

3217

3218

3219

3220

3221

3222

3223

3224

3225

3226void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {

3227

3228

3229 typedef std::pair<SUnit *, unsigned> UnitIndex;

3230 std::vector Indices(NodeOrder.size(), std::make_pair(nullptr, 0));

3231

3232 for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)

3233 Indices.push_back(std::make_pair(NodeOrder[i], i));

3234

3235 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {

3236 return std::get<0>(i1) < std::get<0>(i2);

3237 };

3238

3239

3241

3242 bool Valid = true;

3243 (void)Valid;

3244

3245

3246

3247

3248

3249 for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {

3250 SUnit *SU = NodeOrder[i];

3251 unsigned Index = i;

3252

3253 bool PredBefore = false;

3254 bool SuccBefore = false;

3255

3258 (void)Succ;

3259 (void)Pred;

3260

3261 for (const auto &IE : DDG->getInEdges(SU)) {

3262 SUnit *PredSU = IE.getSrc();

3263 unsigned PredIndex = std::get<1>(

3264 *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));

3265 if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {

3266 PredBefore = true;

3267 Pred = PredSU;

3268 break;

3269 }

3270 }

3271

3272 for (const auto &OE : DDG->getOutEdges(SU)) {

3273 SUnit *SuccSU = OE.getDst();

3274

3275

3276

3278 continue;

3279 unsigned SuccIndex = std::get<1>(

3280 *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));

3281 if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {

3282 SuccBefore = true;

3283 Succ = SuccSU;

3284 break;

3285 }

3286 }

3287

3288 if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {

3289

3290

3292 Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });

3293 if (InCircuit)

3295 else {

3296 Valid = false;

3297 NumNodeOrderIssues++;

3299 }

3301 << " are scheduled before node " << SU->NodeNum

3302 << "\n");

3303 }

3304 }

3305

3307 if (!Valid)

3308 dbgs() << "Invalid node order found!\n";

3309 });

3310}

3311

3312

3313

3314

3315

3316

3317

3319 unsigned OverlapReg = 0;

3320 unsigned NewBaseReg = 0;

3321 for (SUnit *SU : Instrs) {

3323 for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {

3325

3326

3327 if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {

3328

3329

3331 InstrChanges.find(SU);

3332 if (It != InstrChanges.end()) {

3333 unsigned BasePos, OffsetPos;

3334

3338 int64_t NewOffset =

3339 MI->getOperand(OffsetPos).getImm() - It->second.second;

3343 NewMIs[MI] = NewMI;

3344 }

3345 }

3346 OverlapReg = 0;

3347 NewBaseReg = 0;

3348 break;

3349 }

3350

3351

3352 unsigned TiedUseIdx = 0;

3353 if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {

3354

3355 OverlapReg = MI->getOperand(TiedUseIdx).getReg();

3356

3357 NewBaseReg = MI->getOperand(i).getReg();

3358 break;

3359 }

3360 }

3361 }

3362}

3363

3364std::deque<SUnit *>

3366 const std::deque<SUnit *> &Instrs) const {

3367 std::deque<SUnit *> NewOrderPhi;

3368 for (SUnit *SU : Instrs) {

3370 NewOrderPhi.push_back(SU);

3371 }

3372 std::deque<SUnit *> NewOrderI;

3373 for (SUnit *SU : Instrs) {

3376 }

3378 return NewOrderPhi;

3379}

3380

3381

3382

3383

3385

3387 for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;

3388 ++stage) {

3389 std::deque<SUnit *> &cycleInstrs =

3390 ScheduledInstrs[cycle + (stage * InitiationInterval)];

3392 ScheduledInstrs[cycle].push_front(SU);

3393 }

3394 }

3395

3396

3397

3398 for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)

3399 ScheduledInstrs.erase(cycle);

3400

3401

3402

3405

3406

3407

3409 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];

3412 }

3413

3415}

3416

3418 os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV

3419 << " depth " << MaxDepth << " col " << Colocate << "\n";

3420 for (const auto &I : Nodes)

3421 os << " SU(" << I->NodeNum << ") " << *(I->getInstr());

3422 os << "\n";

3423}

3424

3425#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

3426

3428

3430

3432 for (SUnit *CI : cycleInstrs->second) {

3433 os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";

3434 os << "(" << CI->NodeNum << ") ";

3436 os << "\n";

3437 }

3438 }

3439}

3440

3441

3444

3445void ResourceManager::dumpMRT() const {

3447 if (UseDFA)

3448 return;

3449 std::stringstream SS;

3450 SS << "MRT:\n";

3451 SS << std::setw(4) << "Slot";

3453 SS << std::setw(3) << I;

3454 SS << std::setw(7) << "#Mops"

3455 << "\n";

3456 for (int Slot = 0; Slot < InitiationInterval; ++Slot) {

3457 SS << std::setw(4) << Slot;

3459 SS << std::setw(3) << MRT[Slot][I];

3460 SS << std::setw(7) << NumScheduledMops[Slot] << "\n";

3461 }

3462 dbgs() << SS.str();

3463 });

3464}

3465#endif

3466

3469 unsigned ProcResourceID = 0;

3470

3471

3472

3474 "Too many kinds of resources, unsupported");

3475

3476

3480 if (Desc.SubUnitsIdxBegin)

3481 continue;

3482 Masks[I] = 1ULL << ProcResourceID;

3483 ProcResourceID++;

3484 }

3485

3488 if (Desc.SubUnitsIdxBegin)

3489 continue;

3490 Masks[I] = 1ULL << ProcResourceID;

3491 for (unsigned U = 0; U < Desc.NumUnits; ++U)

3492 Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];

3493 ProcResourceID++;

3494 }

3497 dbgs() << "ProcResourceDesc:\n";

3500 dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",

3501 ProcResource->Name, I, Masks[I],

3503 }

3504 dbgs() << " -----------------\n";

3505 }

3506 });

3507}

3508

3512 dbgs() << "canReserveResources:\n";

3513 });

3514 if (UseDFA)

3515 return DFAResources[positiveModulo(Cycle, InitiationInterval)]

3517

3519 if (!SCDesc->isValid()) {

3521 dbgs() << "No valid Schedule Class Desc for schedClass!\n";

3523 });

3524 return true;

3525 }

3526

3527 reserveResources(SCDesc, Cycle);

3528 bool Result = !isOverbooked();

3529 unreserveResources(SCDesc, Cycle);

3530

3532 return Result;

3533}

3534

3535void ResourceManager::reserveResources(SUnit &SU, int Cycle) {

3538 dbgs() << "reserveResources:\n";

3539 });

3540 if (UseDFA)

3541 return DFAResources[positiveModulo(Cycle, InitiationInterval)]

3543

3545 if (!SCDesc->isValid()) {

3547 dbgs() << "No valid Schedule Class Desc for schedClass!\n";

3549 });

3550 return;

3551 }

3552

3553 reserveResources(SCDesc, Cycle);

3554

3557 dumpMRT();

3558 dbgs() << "reserveResources: done!\n\n";

3559 }

3560 });

3561}

3562

3563void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,

3568 for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)

3569 ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];

3570

3572 ++NumScheduledMops[positiveModulo(C, InitiationInterval)];

3573}

3574

3575void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,

3580 for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)

3581 --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];

3582

3584 --NumScheduledMops[positiveModulo(C, InitiationInterval)];

3585}

3586

3587bool ResourceManager::isOverbooked() const {

3589 for (int Slot = 0; Slot < InitiationInterval; ++Slot) {

3592 if (MRT[Slot][I] > Desc->NumUnits)

3593 return true;

3594 }

3595 if (NumScheduledMops[Slot] > IssueWidth)

3596 return true;

3597 }

3598 return false;

3599}

3600

3601int ResourceManager::calculateResMIIDFA() const {

3603

3604

3605

3606 FuncUnitSorter FUS = FuncUnitSorter(*ST);

3608 FUS.calcCriticalResources(*SU.getInstr());

3610 FuncUnitOrder(FUS);

3611

3613 FuncUnitOrder.push(SU.getInstr());

3614

3618

3619 while (!FuncUnitOrder.empty()) {

3621 FuncUnitOrder.pop();

3623 continue;

3624

3625

3626

3628 unsigned ReservedCycles = 0;

3629 auto *RI = Resources.begin();

3630 auto *RE = Resources.end();

3632 dbgs() << "Trying to reserve resource for " << NumCycles

3633 << " cycles for \n";

3635 });

3636 for (unsigned C = 0; C < NumCycles; ++C)

3637 while (RI != RE) {

3638 if ((*RI)->canReserveResources(*MI)) {

3639 (*RI)->reserveResources(*MI);

3640 ++ReservedCycles;

3641 break;

3642 }

3643 RI++;

3644 }

3645 LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles

3646 << ", NumCycles:" << NumCycles << "\n");

3647

3648 for (unsigned C = ReservedCycles; C < NumCycles; ++C) {

3650 << "NewResource created to reserve resources"

3651 << "\n");

3653 assert(NewResource->canReserveResources(*MI) && "Reserve error.");

3654 NewResource->reserveResources(*MI);

3655 Resources.push_back(std::unique_ptr(NewResource));

3656 }

3657 }

3658

3659 int Resmii = Resources.size();

3660 LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");

3661 return Resmii;

3662}

3663

3665 if (UseDFA)

3666 return calculateResMIIDFA();

3667

3668

3669

3670

3671 int NumMops = 0;

3675 continue;

3676

3679 continue;

3680

3685 << " WriteProcRes: ";

3686 }

3687 });

3696 dbgs() << Desc->Name << ": " << PRE.ReleaseAtCycle << ", ";

3697 }

3698 });

3699 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;

3700 }

3702 }

3703

3704 int Result = (NumMops + IssueWidth - 1) / IssueWidth;

3707 dbgs() << "#Mops: " << NumMops << ", "

3708 << "IssueWidth: " << IssueWidth << ", "

3709 << "Cycles: " << Result << "\n";

3710 });

3711

3714 std::stringstream SS;

3715 SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)

3716 << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"

3717 << "\n";

3718 dbgs() << SS.str();

3719 }

3720 });

3723 int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;

3726 std::stringstream SS;

3727 SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)

3728 << Desc->NumUnits << std::setw(10) << ResourceCount[I]

3729 << std::setw(10) << Cycles << "\n";

3730 dbgs() << SS.str();

3731 }

3732 });

3733 if (Cycles > Result)

3734 Result = Cycles;

3735 }

3736 return Result;

3737}

3738

3740 InitiationInterval = II;

3741 DFAResources.clear();

3742 DFAResources.resize(II);

3743 for (auto &I : DFAResources)

3744 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));

3747 NumScheduledMops.clear();

3748 NumScheduledMops.resize(II);

3749}

3750

3752 if (Pred.isArtificial() || Dst->isBoundaryNode())

3753 return true;

3754

3755

3756

3757 return IgnoreAnti && (Pred.getKind() == SDep::Kind::Anti || Distance != 0);

3758}

3759

3760SwingSchedulerDDG::SwingSchedulerDDGEdges &

3761SwingSchedulerDDG::getEdges(const SUnit *SU) {

3762 if (SU == EntrySU)

3763 return EntrySUEdges;

3764 if (SU == ExitSU)

3765 return ExitSUEdges;

3766 return EdgesVec[SU->NodeNum];

3767}

3768

3769const SwingSchedulerDDG::SwingSchedulerDDGEdges &

3770SwingSchedulerDDG::getEdges(const SUnit *SU) const {

3771 if (SU == EntrySU)

3772 return EntrySUEdges;

3773 if (SU == ExitSU)

3774 return ExitSUEdges;

3775 return EdgesVec[SU->NodeNum];

3776}

3777

3778void SwingSchedulerDDG::addEdge(const SUnit *SU,

3780 auto &Edges = getEdges(SU);

3781 if (Edge.getSrc() == SU)

3782 Edges.Succs.push_back(Edge);

3783 else

3784 Edges.Preds.push_back(Edge);

3785}

3786

3787void SwingSchedulerDDG::initEdges(SUnit *SU) {

3788 for (const auto &PI : SU->Preds) {

3790 addEdge(SU, Edge);

3791 }

3792

3793 for (const auto &SI : SU->Succs) {

3795 addEdge(SU, Edge);

3796 }

3797}

3798

3801 : EntrySU(EntrySU), ExitSU(ExitSU) {

3802 EdgesVec.resize(SUnits.size());

3803

3804 initEdges(EntrySU);

3805 initEdges(ExitSU);

3806 for (auto &SU : SUnits)

3807 initEdges(&SU);

3808}

3809

3812 return getEdges(SU).Preds;

3813}

3814

3817 return getEdges(SU).Succs;

3818}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder & UseMI

MachineInstrBuilder MachineInstrBuilder & DefMI

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file contains the simple types necessary to represent the attributes associated with functions a...

This file implements the BitVector class.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

This file defines the DenseMap class.

SmallVector< uint32_t, 0 > Writes

const HexagonInstrInfo * TII

A common definition of LaneBitmask for use in TableGen and CodeGen.

static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))

A command line argument to force pipeliner to use specified initial interval.

static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))

static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))

static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))

static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))

static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)

Compute the live-out registers for the instructions in a node-set.

static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)

Create an instruction stream that represents a single iteration and stage of each instruction.

static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))

static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)

Return true if Set1 contains elements in Set2.

static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))

static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))

Modulo Software Pipelining

static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))

A command line option to disable the pruning of loop carried order dependences.

static bool isDependenceBarrier(MachineInstr &MI)

Return true if the instruction causes a chain between memory references before and after it.

static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))

A command line argument to limit minimum initial interval for pipelining.

static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)

Return the Phi register value that comes the loop block.

static bool isSuccOrder(SUnit *SUa, SUnit *SUb)

Return true if SUb can be reached from SUa following the chain edges.

static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))

A command line argument to limit the number of stages in the pipeline.

static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))

A command line option to enable SWP at -Os.

static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)

Compute the Pred_L(O) set, as defined in the paper.

static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))

static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))

static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)

Return true if there is a path from the specified node to any of the nodes in DestNodes.

static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)

Compute the Succ_L(O) set, as defined in the paper.

static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)

Return the register values for the operands of a Phi instruction.

static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))

static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))

A command line option to turn software pipelining on or off.

static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))

A command line option to disable the pruning of chain dependences due to an unrelated Phi.

static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)

If an instruction has a use that spans multiple iterations, then return true.

unsigned const TargetRegisterInfo * TRI

This file implements a map that provides insertion order iteration.

This file provides utility analysis objects describing memory locations.

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the PriorityQueue class.

Remove Loads Into Fake Uses

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file defines generic set operations that may be used on set's of different types,...

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

Target-Independent Code Generator Pass Configuration Options pass.

static unsigned getSize(unsigned Kind)

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)

A trivial helper function to check to see if the specified pointers are no-alias.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

LLVM Basic Block Representation.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool erase(const KeyT &Val)

Implements a dense probed hash-table based set.

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

A possibly irreducible generalization of a Loop.

Itinerary data supplied by a subtarget to be used by a target.

const InstrStage * beginStage(unsigned ItinClassIndx) const

Return the first stage of the itinerary.

const InstrStage * endStage(unsigned ItinClassIndx) const

Return the last+1 stage of the itinerary.

bool isEmpty() const

Returns true if there are no itineraries.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

TypeSize getValue() const

BlockT * getHeader() const

Represents a single loop in the control flow graph.

DebugLoc getStartLoc() const

Return the debug location of the start of this loop.

unsigned getSchedClass() const

Return the scheduling class for this instruction.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

Generic base class for all target subtargets.

const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const

const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const

Return an iterator at the first process resource consumed by the given scheduling class.

const MCSchedModel & getSchedModel() const

Get the machine model for this subtarget's CPU.

const MDOperand & getOperand(unsigned I) const

ArrayRef< MDOperand > operands() const

unsigned getNumOperands() const

Return number of MDNode operands.

Tracking metadata reference owned by Metadata.

StringRef getString() const

const BasicBlock * getBasicBlock() const

Return the LLVM basic block that this instance corresponded to originally.

iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

DebugLoc findDebugLoc(instr_iterator MBBI)

Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.

instr_iterator instr_end()

Analysis pass which computes a MachineDominatorTree.

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

void deleteMachineInstr(MachineInstr *MI)

DeleteMachineInstr - Delete the given MachineInstr.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

MachineInstr * CloneMachineInstr(const MachineInstr *Orig)

Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Representation of each machine instruction.

bool mayRaiseFPException() const

Return true if this instruction could possibly raise a floating-point exception.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

bool mayLoadOrStore(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly read or modify memory.

const MachineBasicBlock * getParent() const

const MCInstrDesc & getDesc() const

Returns the target instruction descriptor of this MachineInstr.

bool hasUnmodeledSideEffects() const

Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...

bool isRegSequence() const

mmo_iterator memoperands_begin() const

Access to memory operands of the instruction.

bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const

Return true if this instruction is identical to Other.

bool hasOrderedMemoryRef() const

Return true if this instruction may have an ordered or volatile memory reference, or if the informati...

void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const

Print this MI to OS.

bool isPseudo(QueryType Type=IgnoreBundle) const

Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.

const MachineOperand & getOperand(unsigned i) const

iterator_range< filtered_mop_iterator > all_defs()

Returns an iterator range over all operands that are (explicit or implicit) register defs.

A description of a memory reference used in the backend.

AAMDNodes getAAInfo() const

Return the AA tags for the memory reference.

const Value * getValue() const

Return the base address of the memory access.

int64_t getOffset() const

For normal values, this is a byte offset added to the base address.

MachineOperand class - Representation of each machine instruction operand.

void setSubReg(unsigned subReg)

unsigned getSubReg() const

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

void setReg(Register Reg)

Change the register this operand corresponds to.

Register getReg() const

getReg - Returns the register number.

bool isIdenticalTo(const MachineOperand &Other) const

Returns true if this operand is identical to the specified operand except for liveness related flags ...

The main class in the implementation of the target independent software pipeliner pass.

const TargetInstrInfo * TII

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const MachineDominatorTree * MDT

const MachineLoopInfo * MLI

MachineOptimizationRemarkEmitter * ORE

RegisterClassInfo RegClassInfo

defusechain_iterator - This class provides iterator support for machine operands in the function that...

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

MachineInstr * getVRegDef(Register Reg) const

getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...

use_instr_iterator use_instr_begin(Register RegNo) const

static use_instr_iterator use_instr_end()

MachineInstr * getUniqueVRegDef(Register Reg) const

getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...

This class implements a map that also provides access to all stored values in a deterministic order.

iterator find(const KeyT &Key)

static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())

Return a location that may access any location after Ptr, while remaining within the underlying objec...

Expand the kernel using modulo variable expansion algorithm (MVE).

static bool canApply(MachineLoop &L)

Check if ModuloScheduleExpanderMVE can be applied to L.

The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...

void cleanup()

Performs final cleanup after expansion.

void expand()

Performs the actual expansion.

Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...

void annotate()

Performs the annotation.

Represents a schedule for a single-block loop.

A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...

SUnit * getNode(unsigned i) const

void print(raw_ostream &os) const

void setRecMII(unsigned mii)

unsigned count(SUnit *SU) const

void setColocate(unsigned c)

int compareRecMII(NodeSet &RHS)

LLVM_DUMP_METHOD void dump() const

Pass interface - Implemented by all 'passes'.

AnalysisType & getAnalysis() const

getAnalysis() - This function is used by subclasses to get to the analysis information ...

A reimplementation of ModuloScheduleExpander.

PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...

Track the current register pressure at some position in the instruction stream, and remember the high...

void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)

Force liveness of virtual registers or physical register units.

unsigned getRegPressureSetLimit(unsigned Idx) const

Get the register unit limit for the given pressure set index.

void runOnMachineFunction(const MachineFunction &MF)

runOnFunction - Prepare to answer questions about MF.

Wrapper class representing virtual and physical registers.

int calculateResMII() const

void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)

void init(int II)

Initialize resources with the initiation interval II.

bool canReserveResources(SUnit &SU, int Cycle)

Check if the resources occupied by a machine instruction are available in the current state.

Kind

These are the different kinds of scheduling dependencies.

@ Order

Any other ordering dependency.

@ Anti

A register anti-dependence (aka WAR).

@ Data

Regular data dependence (aka true-dependence).

@ Barrier

An unknown scheduling barrier.

@ Artificial

Arbitrary strong DAG edge (no real dependence).

This class represents the scheduled code.

std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const

void setInitiationInterval(int ii)

Set the initiation interval for this schedule.

SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)

Determine transitive dependences of unpipelineable instructions.

void dump() const

Utility function used for debugging to print the schedule.

bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)

Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...

int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)

Return the cycle of the earliest scheduled instruction in the dependence chain.

unsigned getMaxStageCount()

Return the maximum stage count needed for this schedule.

void print(raw_ostream &os) const

Print the schedule information to the given output.

bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const

Return true if all scheduled predecessors are loop-carried output/order dependencies.

int stageScheduled(SUnit *SU) const

Return the stage for a scheduled instruction.

void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const

Order the instructions within a cycle so that the definitions occur before the uses.

bool isValidSchedule(SwingSchedulerDAG *SSD)

int getInitiationInterval() const

Return the initiation interval for this schedule.

std::deque< SUnit * > & getInstructions(int cycle)

Return the instructions that are scheduled at the specified cycle.

int getFirstCycle() const

Return the first cycle in the completed schedule.

bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const

Return true if the instruction is a definition that is loop carried and defines the use on the next i...

unsigned cycleScheduled(SUnit *SU) const

Return the cycle for a scheduled instruction.

void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)

Compute the scheduling start slot for the instruction.

bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)

bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const

Return true if the scheduled Phi has a loop carried operand.

int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)

Return the cycle of the latest scheduled instruction in the dependence chain.

int getFinalCycle() const

Return the last cycle in the finalized schedule.

void finalizeSchedule(SwingSchedulerDAG *SSD)

After the schedule has been formed, call this function to combine the instructions from the different...

Scheduling unit. This is a node in the scheduling DAG.

bool isInstr() const

Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.

unsigned NodeNum

Entry # of node in the node vector.

void setInstr(MachineInstr *MI)

Assigns the instruction for the SUnit.

void removePred(const SDep &D)

Removes the specified edge as a pred of the current node if it exists.

bool isPred(const SUnit *N) const

Tests if node N is a predecessor of this node.

unsigned short Latency

Node latency.

bool isBoundaryNode() const

Boundary nodes are placeholders for the boundary of the scheduling region.

bool hasPhysRegDefs

Has physreg defs that are being used.

SmallVector< SDep, 4 > Succs

All sunit successors.

SmallVector< SDep, 4 > Preds

All sunit predecessors.

bool addPred(const SDep &D, bool Required=true)

Adds the specified edge as a pred of the current node if not already.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

A ScheduleDAG for scheduling lists of MachineInstr.

DenseMap< MachineInstr *, SUnit * > MISUnitMap

After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...

virtual void finishBlock()

Cleans up after scheduling in the given block.

MachineBasicBlock * BB

The block in which to insert instructions.

const MCSchedClassDesc * getSchedClass(SUnit *SU) const

Resolves and cache a resolved scheduling class for an SUnit.

void dumpNode(const SUnit &SU) const override

UndefValue * UnknownValue

For an unanalyzable memory access, this Value is used in maps.

void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)

Builds SUnits for the current region.

SUnit * getSUnit(MachineInstr *MI) const

Returns an existing SUnit for this MI, or nullptr.

TargetSchedModel SchedModel

TargetSchedModel provides an interface to the machine model.

void dump() const override

void RemovePred(SUnit *M, SUnit *N)

Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...

void InitDAGTopologicalSorting()

Creates the initial topological ordering from the DAG to be scheduled.

void AddPred(SUnit *Y, SUnit *X)

Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.

bool IsReachable(const SUnit *SU, const SUnit *TargetSU)

Checks if SU is reachable from TargetSU.

MachineRegisterInfo & MRI

Virtual/real register map.

const TargetInstrInfo * TII

Target instruction information.

std::vector< SUnit > SUnits

The scheduling units.

const TargetRegisterInfo * TRI

Target processor register info.

SUnit EntrySU

Special node for the region entry.

MachineFunction & MF

Machine function.

SUnit ExitSU

Special node for the region exit.

A vector that has set insertion semantics.

size_type size() const

Determine the number of elements in the SetVector.

typename vector_type::const_iterator iterator

void clear()

Completely clear the SetVector.

size_type count(const key_type &key) const

Count the number of elements of a given key in the SetVector.

bool empty() const

Determine if the SetVector is empty or not.

bool insert(const value_type &X)

Insert a new element into the SetVector.

bool contains(const key_type &key) const

Check if the SetVector contains the given key.

SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)

Insert the given machine instruction into the mapping.

Implements a dense probed hash-table based set with some number of buckets stored inline.

bool erase(PtrType Ptr)

Remove pointer from the set.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

bool contains(const T &V) const

Check if the SmallSet contains the given element.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...

unsigned getInstrBaseReg(SUnit *SU) const

Return the new base register that was stored away for the changed instruction.

unsigned getDepth(SUnit *Node)

The depth, in the dependence graph, for a node.

int getASAP(SUnit *Node)

Return the earliest time an instruction may be scheduled.

void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)

Apply changes to the instruction if needed.

const SwingSchedulerDDG * getDDG() const

void finishBlock() override

Clean up after the software pipeliner runs.

void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)

Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...

int getZeroLatencyDepth(SUnit *Node)

The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...

bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const

Return true for an order or output dependence that is loop carried potentially.

void schedule() override

We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...

int getMOV(SUnit *Node)

The mobility function, which the number of slots in which an instruction may be scheduled.

int getZeroLatencyHeight(SUnit *Node)

The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...

unsigned getHeight(SUnit *Node)

The height, in the dependence graph, for a node.

int getALAP(SUnit *Node)

Return the latest time an instruction my be scheduled.

Represents a dependence between two instruction.

SUnit * getDst() const

Returns the SUnit to which the edge points (destination node).

bool isArtificial() const

Returns true if the edge represents an artificial dependence.

bool ignoreDependence(bool IgnoreAnti) const

Returns true for DDG nodes that we ignore when computing the cost functions.

bool isOrderDep() const

Returns true if the edge represents a dependence that is not data, anti or output dependence.

SUnit * getSrc() const

Returns the SUnit from which the edge comes (source node).

bool isOutputDep() const

Returns true if the edge represents output dependence.

Represents dependencies between instructions.

SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU)

const EdgesType & getInEdges(const SUnit *SU) const

const EdgesType & getOutEdges(const SUnit *SU) const

Object returned by analyzeLoopForPipelining.

virtual bool isMVEExpanderSupported()

Return true if the target can expand pipelined schedule with modulo variable expansion.

virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0

Return true if the given instruction should not be pipelined and should be ignored.

virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)

Return true if the proposed schedule should used.

virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const

Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...

bool isZeroCost(unsigned Opcode) const

Return true for pseudo instructions that don't consume any machine resources in their current form.

virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const

Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....

virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const

Create machine specific model for scheduling.

virtual bool isPostIncrement(const MachineInstr &MI) const

Return true for post-incremented instructions.

virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const

Return true if the instruction contains a base register and offset.

virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const

Sometimes, it is possible for the target to tell, even without aliasing information,...

virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const

If the instruction is an increment of a constant value, return the amount.

bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const

Get the base operand and byte offset of an instruction that reads/writes memory.

Target-Independent Code Generator Pass Configuration Options.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

virtual const char * getRegPressureSetName(unsigned Idx) const =0

Get the name of this register unit pressure set.

TargetSubtargetInfo - Generic base class for all target subtargets.

virtual const TargetRegisterInfo * getRegisterInfo() const

getRegisterInfo - If register information is available, return it.

static Type * getVoidTy(LLVMContext &C)

static UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

The main class in the implementation of the target independent window scheduler.

unsigned getPosition() const

std::pair< iterator, bool > insert(const ValueT &V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

This class implements an extremely fast bulk output stream that can only output to a stream.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

Reg

All possible values of the reg field in the ModR/M byte.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

DiagnosticInfoOptimizationBase::Argument NV

NodeAddr< PhiNode * > Phi

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void stable_sort(R &&Range)

int popcount(T Value) noexcept

Count the number of set bits in a value.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

bool set_is_subset(const S1Ty &S1, const S2Ty &S2)

set_is_subset(A, B) - Return true iff A in B

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

void erase(Container &C, ValueType V)

Wrapper function to remove a value from a container:

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

@ WS_Force

Use window algorithm after SMS algorithm fails.

@ WS_On

Turn off window algorithm.

void sort(IteratorTy Start, IteratorTy End)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

unsigned getRegState(const MachineOperand &RegOp)

Get all register state flags from machine operand RegOp.

auto lower_bound(R &&Range, T &&Value)

Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...

char & MachinePipelinerID

This pass performs software pipelining on machine instructions.

cl::opt< bool > SwpEnableCopyToPhi

void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)

This method is similar to getUnderlyingObject except that it can look through phi and select instruct...

cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))

A command line argument to set the window scheduling option.

bool isIdentifiedObject(const Value *V)

Return true if this pointer refers to a distinct and identifiable object.

Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

cl::opt< int > SwpForceIssueWidth

A command line argument to force pipeliner to use specified issue width.

Description of the encoding of one expression Op.

These values represent a non-pipelined step in the execution of an instruction.

RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.

static constexpr LaneBitmask getNone()

Define a kind of processor resource that will be modeled by the scheduler.

Summarize the scheduling resources required for an instruction of a particular scheduling class.

Machine model for scheduling, bundling, and heuristics.

const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const

unsigned getNumProcResourceKinds() const

bool hasInstrSchedModel() const

Does this machine model include instruction-level scheduling.

const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const

Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...

MachineInstr * LoopInductionVar

SmallVector< MachineOperand, 4 > BrCond

MachineInstr * LoopCompare

std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

const TargetPassConfig * PassConfig

const MachineDominatorTree * MDT

RegisterClassInfo * RegClassInfo

const MachineLoopInfo * MLI

Store the effects of a change in pressure on things that MI scheduler cares about.

std::vector< unsigned > MaxSetPressure

Map of max reg pressure indexed by pressure set ID, not class ID.