LLVM: lib/CodeGen/WindowScheduler.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

49

50using namespace llvm;

51

52#define DEBUG_TYPE "pipeliner"

53

54namespace {

56 "Number of loops that we attempt to use window scheduling");

58 "Number of times that we run list schedule in the window scheduling");

60 "Number of loops that we successfully use window scheduling");

62 "Window scheduling abort due to the failure of the II analysis");

63

65 WindowSearchNum("window-search-num",

66 cl::desc("The number of searches per loop in the window "

67 "algorithm. 0 means no search number limit."),

69

71 "window-search-ratio",

72 cl::desc("The ratio of searches per loop in the window algorithm. 100 "

73 "means search all positions in the loop, while 0 means not "

74 "performing any search."),

76

78 "window-ii-coeff",

80 "The coefficient used when initializing II in the window algorithm."),

82

84 "window-region-limit",

86 "The lower limit of the scheduling region in the window algorithm."),

88

90 "window-diff-limit",

91 cl::desc("The lower limit of the difference between best II and base II in "

92 "the window algorithm. If the difference is smaller than "

93 "this lower limit, window scheduling will not be performed."),

95}

96

97

98

101 cl::desc("The upper limit of II in the window algorithm."),

103

108 TripleDAG = std::unique_ptr(

110}

111

114 LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");

115 return false;

116 }

117

118

120 ++NumTryWindowSchedule;

121

123

125 auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio);

126 for (unsigned Idx : SearchIndexes) {

128 ++NumTryWindowSearch;

129

130

133 SchedDAG->startBlock(MBB);

135 SchedDAG->schedule();

140 LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");

141 ++NumFailAnalyseII;

142 continue;

143 }

148 << II << ".\n");

149 }

150

152

154 LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");

155 return false;

156 }

158 << " and Best II is " << BestII << ".\n");

159

161 ++NumWindowSchedule;

162 return true;

163}

164

167 return OnlyBuildGraph

169 Context, std::make_unique(Context),

170 true)

172}

173

175 if (Subtarget->enableWindowScheduler()) {

176 LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");

177 return false;

178 }

179

190

192 LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");

193 return false;

194 }

195

198 auto IsLoopCarried = [&](MachineInstr &Phi) {

199

200

201

202 if (PrevUses.count(Phi.getOperand(0).getReg()))

203 return true;

204 PrevDefs.insert(Phi.getOperand(0).getReg());

205 for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {

206 if (PrevDefs.count(Phi.getOperand(I).getReg()))

207 return true;

208 PrevUses.insert(Phi.getOperand(I).getReg());

209 }

210 return false;

211 };

212 auto PLI = TII->analyzeLoopForPipelining(MBB);

213 for (auto &MI : *MBB) {

214 if (MI.isMetaInstruction() || MI.isTerminator())

215 continue;

216 if (MI.isPHI()) {

217 if (IsLoopCarried(MI)) {

218 LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n");

219 return false;

220 }

223 } else

225 if (TII->isSchedulingBoundary(MI, MBB, *MF)) {

227 dbgs() << "Boundary MI is not allowed in window scheduling!\n");

228 return false;

229 }

230 if (PLI->shouldIgnoreForPipelining(&MI)) {

231 LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in "

232 "window scheduling!\n");

233 return false;

234 }

235 for (auto &Def : MI.all_defs())

236 if (Def.isReg() && Def.getReg().isPhysical()) {

237 LLVM_DEBUG(dbgs() << "Physical registers are not supported in "

238 "window scheduling!\n");

239 return false;

240 }

241 }

243 LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");

244 return false;

245 }

246 return true;

247}

248

250

251

256 MBB, MBB->begin(), MBB->getFirstTerminator(),

257 std::distance(MBB->begin(), MBB->getFirstTerminator()));

259}

260

268

270 for (auto &MI : MBB->instrs())

272

274 Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);

275 MBB->remove(&MI);

276 }

277}

278

280

282 Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);

283 MI.eraseFromParent();

284 }

285

288}

289

291 const unsigned DuplicateNum = 3;

294 assert(OriMIs.size() > 0 && "The Original MIs were not backed up!");

295

296

297

300 if (MI->isMetaInstruction() || MI->isTerminator())

301 continue;

302 if (MI->isPHI())

304 DefPairs[MI->getOperand(0).getReg()] = AntiReg;

305 auto *NewMI = MF->CloneMachineInstr(MI);

306 MBB->push_back(NewMI);

307 TriMIs.push_back(NewMI);

309 }

310

311

312

313 for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) {

315 if (MI->isPHI() || MI->isMetaInstruction() ||

316 (MI->isTerminator() && Cnt < DuplicateNum - 1))

317 continue;

318 auto *NewMI = MF->CloneMachineInstr(MI);

320

321 for (auto MO : NewMI->all_defs())

322 if (MO.isReg() && MO.getReg().isVirtual()) {

324 MRI->createVirtualRegister(MRI->getRegClass(MO.getReg()));

325 NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI);

326 NewDefs[MO.getReg()] = NewDef;

327 }

328

329 for (auto DefRegPair : DefPairs)

330 if (NewMI->readsRegister(DefRegPair.first, TRI)) {

331 Register NewUse = DefRegPair.second;

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359 if (auto It = DefPairs.find(NewUse); It != DefPairs.end())

360 NewUse = It->second;

361 NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI);

362 }

363

364 for (auto &NewDef : NewDefs)

365 DefPairs[NewDef.first] = NewDef.second;

366 MBB->push_back(NewMI);

367 TriMIs.push_back(NewMI);

369 }

370 }

371

372

373

374

375

376

377 for (auto &Phi : MBB->phis()) {

378 for (auto DefRegPair : DefPairs)

379 if (Phi.readsRegister(DefRegPair.first, TRI))

380 Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI);

381 }

383}

384

386

387 for (size_t I = 0; I < TriMIs.size(); ++I) {

389 auto OldPos = MBB->begin();

390 std::advance(OldPos, I);

391 auto CurPos = MI->getIterator();

392 if (CurPos != OldPos) {

393 MBB->splice(OldPos, MBB, CurPos);

394 Context->LIS->handleMove(*MI, false);

395 }

396 }

397}

398

400 unsigned SearchRatio) {

401

402

403

404

405 assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!");

406 unsigned MaxIdx = SchedInstrNum * SearchRatio / 100;

407 unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1;

409 for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step)

411 return SearchIndexes;

412}

413

415

416 unsigned MaxDepth = 1;

417 for (auto &SU : DAG.SUnits)

418 MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth);

419 return MaxDepth * WindowIICoeff;

420}

421

426 RM.init(InitII);

427

428

429

430 int CurCycle = 0;

434 int ExpectCycle = CurCycle;

435

436 for (auto &Pred : SU->Preds) {

437 if (Pred.isWeak())

438 continue;

439 auto *PredMI = Pred.getSUnit()->getInstr();

441 ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());

442 }

443

444 if (TII->isZeroCost(MI.getOpcode())) {

445

446

447 while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {

448 ++CurCycle;

450 return CurCycle;

451 }

452 RM.reserveResources(*SU, CurCycle);

453 }

455 LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."

457 }

458 LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n");

459 return CurCycle;

460}

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

493 int MaxStallCycle = 0;

494 int CurrentII = MaxCycle + 1;

499 for (auto &Succ : SU->Succs) {

500 if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)

501 continue;

502

503 if (DefCycle + (int)Succ.getLatency() <= CurrentII)

504 continue;

505

506

507

508 auto *SuccMI = Succ.getSUnit()->getInstr();

510 if (DefCycle < UseCycle)

512

513 int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;

514 MaxStallCycle = std::max(MaxStallCycle, StallCycle);

515 }

516 }

517 LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n");

518 return MaxStallCycle;

519}

520

525 return MaxCycle;

528 return StallCycle;

529

530 return MaxCycle + StallCycle + 1;

531}

532

535 for (auto &Phi : MBB->phis()) {

536 int LateCycle = INT_MAX;

537 auto *SU = TripleDAG->getSUnit(&Phi);

538 for (auto &Succ : SU->Succs) {

539

541 continue;

542

543

544 auto *SuccMI = Succ.getSUnit()->getInstr();

547 LateCycle = std::min(LateCycle, Cycle);

548 }

549

551 auto *AntiMI = MRI->getVRegDef(AntiReg);

552

553 if (AntiMI->getParent() == MBB) {

556 LateCycle = std::min(LateCycle, AntiCycle);

557 }

558 }

559

560 if (LateCycle == INT_MAX)

561 LateCycle = (int)(II - 1);

562 LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi);

563

564 auto *OriPhi = getOriMI(&Phi);

566 }

567}

568

570 unsigned II) {

571

572

575 for (auto &Phi : MBB->phis())

579

580

582 int Id = 0;

584 auto It = CycleToMIs.find(Cycle);

585 if (It == CycleToMIs.end())

586 continue;

587 for (auto *MI : It->second)

588 IssueOrder[MI] = Id++;

589 }

590 return IssueOrder;

591}

592

594

595

600 return;

601 }

602

603

605 return;

608

609

613 assert(IssueOrder.count(Pair.first) && "Cannot find original MI!");

614 SchedResult.push_back(std::make_tuple(Pair.first, Pair.second,

616 IssueOrder[Pair.first]));

617 }

618}

619

621

623 [](const std::tuple<MachineInstr *, int, int, int> &A,

624 const std::tuple<MachineInstr *, int, int, int> &B) {

625 return std::get<3>(A) < std::get<3>(B);

626 });

627

628

630 std::vector<MachineInstr *> OrderedInsts;

632 auto *MI = std::get<0>(Info);

633 OrderedInsts.push_back(MI);

634 Cycles[MI] = std::get<1>(Info);

635 Stages[MI] = std::get<2>(Info);

636 LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI]

637 << "]: " << *MI);

638 }

640 std::move(Stages));

645}

646

651 if (!MO.isReg() || MO.getReg() == 0)

652 continue;

656 }

657 Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs);

658}

659

662 auto RegionBegin = MBB->begin();

663 std::advance(RegionBegin, Offset);

664 auto RegionEnd = RegionBegin;

665 std::advance(RegionEnd, Num);

666 return make_range(RegionBegin, RegionEnd);

667}

668

675

680

683

685 return 0;

686

687

688 unsigned Id = 0;

690 if (MI->isMetaInstruction())

691 continue;

692 if (MI == OriMI)

693 break;

694 ++Id;

695 }

696 return Id >= (size_t)Offset ? 1 : 0;

697}

698

700 assert(Phi->isPHI() && "Expecting PHI!");

702 for (auto MO : Phi->uses()) {

703 if (MO.isReg())

704 AntiReg = MO.getReg();

705 else if (MO.isMBB() && MO.getMBB() == MBB)

706 return AntiReg;

707 }

708 return 0;

709}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

uint64_t IntrinsicInst * II

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

Target-Independent Code Generator Pass Configuration Options pass.

static cl::opt< unsigned > WindowIILimit("window-ii-limit", cl::desc("The upper limit of II in the window algorithm."), cl::Hidden, cl::init(1000))

iterator find(const_arg_type_t< KeyT > Val)

Representation of each machine instruction.

MachineOperand class - Representation of each machine instruction operand.

The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...

void cleanup()

Performs final cleanup after expansion.

void expand()

Performs the actual expansion.

DenseMap< MachineInstr *, std::pair< Register, int64_t > > InstrChangesTy

Represents a schedule for a single-block loop.

Wrapper class representing virtual and physical registers.

@ Data

Regular data dependence (aka true-dependence).

A ScheduleDAG for scheduling lists of MachineInstr.

SUnit * getSUnit(MachineInstr *MI) const

Returns an existing SUnit for this MI, or nullptr.

ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...

std::vector< SUnit > SUnits

The scheduling units.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.

MachineInstr * getOriMI(MachineInstr *NewMI)

Get the original MI from which the new MI is cloned.

Definition WindowScheduler.cpp:676

virtual ScheduleDAGInstrs * createMachineScheduler(bool OnlyBuildGraph=false)

Two types of ScheduleDAGs are needed, one for creating dependency graphs only, and the other for list...

Definition WindowScheduler.cpp:166

unsigned SchedPhiNum

SchedPhiNum records the number of phi in the original MBB, and the scheduling starts with MI after ph...

virtual void preProcess()

Add some related processing before running window scheduling.

Definition WindowScheduler.cpp:249

MachineSchedContext * Context

bool run()

Definition WindowScheduler.cpp:112

virtual void restoreTripleMBB()

Restore the order of MIs in TripleMBB after each list scheduling.

Definition WindowScheduler.cpp:385

virtual void postProcess()

Add some related processing after running window scheduling.

Definition WindowScheduler.cpp:261

DenseMap< MachineInstr *, int > OriToCycle

OriToCycle keeps the mappings between the original MI and its issue cycle.

virtual int calculateMaxCycle(ScheduleDAGInstrs &DAG, unsigned Offset)

Calculate MIs execution cycle after list scheduling.

Definition WindowScheduler.cpp:422

MachineRegisterInfo * MRI

unsigned BestII

BestII and BestOffset record the characteristics of the best scheduling result and are used together ...

void backupMBB()

Back up the MIs in the original MBB and remove them from MBB.

Definition WindowScheduler.cpp:269

DenseMap< MachineInstr *, MachineInstr * > TriToOri

TriToOri keeps the mappings between the MI clones in TripleMBB and their original MI.

int getEstimatedII(ScheduleDAGInstrs &DAG)

Estimate a II value at which all MIs will be scheduled successfully.

Definition WindowScheduler.cpp:414

virtual void expand()

Using the scheduling infrastructure to expand the results of window scheduling.

Definition WindowScheduler.cpp:620

DenseMap< MachineInstr *, int > getIssueOrder(unsigned Offset, unsigned II)

Get the final issue order of all scheduled MIs including phis.

Definition WindowScheduler.cpp:569

Register getAntiRegister(MachineInstr *Phi)

Gets the register in phi which is generated from the current MBB.

Definition WindowScheduler.cpp:699

unsigned getOriStage(MachineInstr *OriMI, unsigned Offset)

Get the scheduling stage, where the stage of the new MI is identical to the original MI.

Definition WindowScheduler.cpp:681

const TargetRegisterInfo * TRI

virtual void updateLiveIntervals()

Update the live intervals for all registers used within MBB.

Definition WindowScheduler.cpp:647

const TargetSubtargetInfo * Subtarget

std::unique_ptr< ScheduleDAGInstrs > TripleDAG

To innovatively identify the dependencies between MIs across two trips, we construct a DAG for a new ...

unsigned BaseII

BaseII is the II obtained when the window offset is SchedPhiNum.

virtual void schedulePhi(int Offset, unsigned &II)

Phis are scheduled separately after each list scheduling.

Definition WindowScheduler.cpp:533

virtual unsigned analyseII(ScheduleDAGInstrs &DAG, unsigned Offset)

Analyzes the II value after each list scheduling.

Definition WindowScheduler.cpp:521

int getOriCycle(MachineInstr *NewMI)

Get the issue cycle of the new MI based on the cycle of the original MI.

Definition WindowScheduler.cpp:669

unsigned SchedInstrNum

SchedInstrNum records the MIs involved in scheduling in the original MBB, excluding debug instruction...

const TargetInstrInfo * TII

virtual void generateTripleMBB()

Make three copies of the original MBB to generate a new TripleMBB.

Definition WindowScheduler.cpp:290

iterator_range< MachineBasicBlock::iterator > getScheduleRange(unsigned Offset, unsigned Num)

Gets the iterator range of MIs in the scheduling window.

Definition WindowScheduler.cpp:661

virtual bool isScheduleValid()

Check whether the final result of window scheduling is valid.

virtual int calculateStallCycle(unsigned Offset, int MaxCycle)

Calculate the stall cycle between two trips after list scheduling.

Definition WindowScheduler.cpp:492

virtual void updateScheduleResult(unsigned Offset, unsigned II)

Update the scheduling result after each list scheduling.

Definition WindowScheduler.cpp:593

SmallVector< MachineInstr * > OriMIs

OriMIs keeps the MIs removed from the original MBB.

virtual bool initialize()

Initializes the algorithm and determines if it can be executed.

Definition WindowScheduler.cpp:174

SmallVector< std::tuple< MachineInstr *, int, int, int >, 256 > SchedResult

SchedResult keeps the result of each list scheduling, and the format of the tuple is <MI pointer,...

virtual SmallVector< unsigned > getSearchIndexes(unsigned SearchNum, unsigned SearchRatio)

Give the folding position in the window algorithm, where different heuristics can be used.

Definition WindowScheduler.cpp:399

void restoreMBB()

Erase the MIs in current MBB and restore the original MIs.

Definition WindowScheduler.cpp:279

WindowScheduler(MachineSchedContext *C, MachineLoop &ML)

Definition WindowScheduler.cpp:104

SmallVector< MachineInstr * > TriMIs

TriMIs keeps the MIs of TripleMBB, which is used to restore TripleMBB.

A range adaptor for a pair of iterators.

@ C

The default llvm calling convention, compatible with C.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

void stable_sort(R &&Range)

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...