LLVM: lib/CodeGen/PostRASchedulerList.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

35#include "llvm/Config/llvm-config.h"

42using namespace llvm;

43

44#define DEBUG_TYPE "post-RA-sched"

45

46STATISTIC(NumNoops, "Number of noops inserted");

47STATISTIC(NumStalls, "Number of pipeline stalls");

48STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");

49

50

51

52

55 cl::desc("Enable scheduling after register allocation"),

59 cl::desc("Break post-RA scheduling anti-dependencies: "

60 "\"critical\", \"all\", or \"none\""),

62

63

66 cl::desc("Debug control MBBs that are scheduled"),

70 cl::desc("Debug control MBBs that are scheduled"),

72

74

75namespace {

79

80 public:

81 static char ID;

83

93 }

94

97 MachineFunctionProperties::Property::NoVRegs);

98 }

99

101 };

102 char PostRAScheduler::ID = 0;

103

105

106

108

109

110

111

112

113 std::vector<SUnit*> PendingQueue;

114

115

117

118

120

121

123

124

125 std::vector<SUnit*> Sequence;

126

127

128 std::vector<std::unique_ptr> Mutations;

129

130

131

132

133

134 unsigned EndIndex = 0;

135

136 public:

137 SchedulePostRATDList(

142

143 ~SchedulePostRATDList() override;

144

145

146

147

149

150

151 void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }

152

153

157 unsigned regioninstrs) override;

158

159

161

162

163

165

166 void EmitSchedule();

167

168

169

170

172

173

174

176

177 private:

178

179 void postProcessDAG();

180

181 void ReleaseSucc(SUnit *SU, SDep *SuccEdge);

182 void ReleaseSuccessors(SUnit *SU);

183 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);

184 void ListScheduleTopDown();

185

186 void dumpSchedule() const;

187 void emitNoop(unsigned CurCycle);

188 };

189}

190

192

194 "Post RA top-down list latency scheduler", false, false)

195

196SchedulePostRATDList::SchedulePostRATDList(

202

204 MF.getSubtarget().getInstrItineraryData();

205 HazardRec =

206 MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(

207 InstrItins, this);

208 MF.getSubtarget().getPostRAMutations(Mutations);

209

210 assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||

211 MRI.tracksLiveness()) &&

212 "Live-ins must be accurate for anti-dependency breaking");

213 AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL)

215 : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL)

217 : nullptr));

218}

219

220SchedulePostRATDList::~SchedulePostRATDList() {

221 delete HazardRec;

222 delete AntiDepBreak;

223}

224

225

229 unsigned regioninstrs) {

232}

233

234

235void SchedulePostRATDList::exitRegion() {

237 dbgs() << "*** Final schedule ***\n";

238 dumpSchedule();

239 dbgs() << '\n';

240 });

242}

243

244#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

245

246LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {

247 for (const SUnit *SU : Sequence) {

248 if (SU)

249 dumpNode(*SU);

250 else

251 dbgs() << "**** NOOP ****\n";

252 }

253}

254#endif

255

258

261

262 return ST.enablePostRAScheduler() &&

263 OptLevel >= ST.getOptLevelToEnablePostRAScheduler();

264}

265

266bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {

268 return false;

269

271 TargetPassConfig *PassConfig = &getAnalysis();

272

274 return false;

275

276 TII = Subtarget.getInstrInfo();

277 MachineLoopInfo &MLI = getAnalysis().getLI();

278 AliasAnalysis *AA = &getAnalysis().getAAResults();

280 Subtarget.getAntiDepBreakMode();

283 ? TargetSubtargetInfo::ANTIDEP_ALL

285 ? TargetSubtargetInfo::ANTIDEP_CRITICAL

286 : TargetSubtargetInfo::ANTIDEP_NONE);

287 }

289 Subtarget.getCriticalPathRCs(CriticalPathRCs);

291

293

294 SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,

295 CriticalPathRCs);

296

297

298 for (auto &MBB : Fn) {

299#ifndef NDEBUG

300

302 static int bbcnt = 0;

304 continue;

305 dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":"

307 }

308#endif

309

310

312

313

314

316 unsigned Count = MBB.size(), CurrentCount = Count;

319 --Count;

320

321

322

324 Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);

325 Scheduler.setEndIndex(CurrentCount);

329 Current = &MI;

330 CurrentCount = Count;

332 }

334 if (MI.isBundle())

335 Count -= MI.getBundleSize();

336 }

337 assert(Count == 0 && "Instruction count mismatch!");

338 assert((MBB.begin() == Current || CurrentCount != 0) &&

339 "Instruction count mismatch!");

341 Scheduler.setEndIndex(CurrentCount);

345

346

348

349

351 }

352

353 return true;

354}

355

356

357

358

360

362

363

364 HazardRec->Reset();

365 if (AntiDepBreak)

366 AntiDepBreak->StartBlock(BB);

367}

368

369

370

371void SchedulePostRATDList::schedule() {

372

373 buildSchedGraph(AA);

374

375 if (AntiDepBreak) {

376 unsigned Broken =

377 AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,

378 EndIndex, DbgValues);

379

380 if (Broken != 0) {

381

382

383

384

385

386

388 buildSchedGraph(AA);

389

390 NumFixedAnti += Broken;

391 }

392 }

393

394 postProcessDAG();

395

396 LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");

398

399 AvailableQueue.initNodes(SUnits);

400 ListScheduleTopDown();

401 AvailableQueue.releaseState();

402}

403

404

405

406

407void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) {

408 if (AntiDepBreak)

409 AntiDepBreak->Observe(MI, Count, EndIndex);

410}

411

412

413

414void SchedulePostRATDList::finishBlock() {

415 if (AntiDepBreak)

416 AntiDepBreak->FinishBlock();

417

418

420}

421

422

423void SchedulePostRATDList::postProcessDAG() {

424 for (auto &M : Mutations)

425 M->apply(this);

426}

427

428

429

430

431

432

433

434void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {

436

437 if (SuccEdge->isWeak()) {

439 return;

440 }

441#ifndef NDEBUG

443 dbgs() << "*** Scheduling failed! ***\n";

444 dumpNode(*SuccSU);

445 dbgs() << " has been released too many times!\n";

447 }

448#endif

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)

465 PendingQueue.push_back(SuccSU);

466}

467

468

469void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {

471 I != E; ++I) {

472 ReleaseSucc(SU, &*I);

473 }

474}

475

476

477

478

479void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {

480 LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");

482

485 "Node scheduled above its depth!");

487

488 ReleaseSuccessors(SU);

490 AvailableQueue.scheduledNode(SU);

491}

492

493

494void SchedulePostRATDList::emitNoop(unsigned CurCycle) {

495 LLVM_DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');

496 HazardRec->EmitNoop();

497 Sequence.push_back(nullptr);

498 ++NumNoops;

499}

500

501

502

503void SchedulePostRATDList::ListScheduleTopDown() {

504 unsigned CurCycle = 0;

505

506

507

508

509

510 HazardRec->Reset();

511

512

513 ReleaseSuccessors(&EntrySU);

514

515

517

519 AvailableQueue.push(&SUnit);

521 }

522 }

523

524

525

526 bool CycleHasInsts = false;

527

528

529

530 std::vector<SUnit*> NotReady;

531 Sequence.reserve(SUnits.size());

532 while (!AvailableQueue.empty() || !PendingQueue.empty()) {

533

534

535 unsigned MinDepth = ~0u;

536 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {

537 if (PendingQueue[i]->getDepth() <= CurCycle) {

538 AvailableQueue.push(PendingQueue[i]);

539 PendingQueue[i]->isAvailable = true;

540 PendingQueue[i] = PendingQueue.back();

541 PendingQueue.pop_back();

542 --i; --e;

543 } else if (PendingQueue[i]->getDepth() < MinDepth)

544 MinDepth = PendingQueue[i]->getDepth();

545 }

546

548 AvailableQueue.dump(this));

549

550 SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;

551 bool HasNoopHazards = false;

552 while (!AvailableQueue.empty()) {

553 SUnit *CurSUnit = AvailableQueue.pop();

554

556 HazardRec->getHazardType(CurSUnit, 0);

558 if (HazardRec->ShouldPreferAnother(CurSUnit)) {

559 if (!NotPreferredSUnit) {

560

561

562

563

564 NotPreferredSUnit = CurSUnit;

565 continue;

566 }

567 } else {

568 FoundSUnit = CurSUnit;

569 break;

570 }

571 }

572

573

575

576 NotReady.push_back(CurSUnit);

577 }

578

579

580

581

582 if (NotPreferredSUnit) {

583 if (!FoundSUnit) {

585 dbgs() << "*** Will schedule a non-preferred instruction...\n");

586 FoundSUnit = NotPreferredSUnit;

587 } else {

588 AvailableQueue.push(NotPreferredSUnit);

589 }

590

591 NotPreferredSUnit = nullptr;

592 }

593

594

595 if (!NotReady.empty()) {

596 AvailableQueue.push_all(NotReady);

597 NotReady.clear();

598 }

599

600

601 if (FoundSUnit) {

602

603 unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);

604 for (unsigned i = 0; i != NumPreNoops; ++i)

605 emitNoop(CurCycle);

606

607

608 ScheduleNodeTopDown(FoundSUnit, CurCycle);

609 HazardRec->EmitInstruction(FoundSUnit);

610 CycleHasInsts = true;

611 if (HazardRec->atIssueLimit()) {

612 LLVM_DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle

613 << '\n');

614 HazardRec->AdvanceCycle();

615 ++CurCycle;

616 CycleHasInsts = false;

617 }

618 } else {

619 if (CycleHasInsts) {

620 LLVM_DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');

621 HazardRec->AdvanceCycle();

622 } else if (!HasNoopHazards) {

623

624

625 LLVM_DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');

626 HazardRec->AdvanceCycle();

627 ++NumStalls;

628 } else {

629

630

631

632 emitNoop(CurCycle);

633 }

634

635 ++CurCycle;

636 CycleHasInsts = false;

637 }

638 }

639

640#ifndef NDEBUG

641 unsigned ScheduledNodes = VerifyScheduledDAG(false);

642 unsigned Noops = llvm::count(Sequence, nullptr);

644 "The number of nodes scheduled doesn't match the expected number!");

645#endif

646}

647

648

649void SchedulePostRATDList::EmitSchedule() {

650 RegionBegin = RegionEnd;

651

652

653 if (FirstDbgValue)

654 BB->splice(RegionEnd, BB, FirstDbgValue);

655

656

657 for (unsigned i = 0, e = Sequence.size(); i != e; i++) {

658 if (SUnit *SU = Sequence[i])

660 else

661

663

664

665

666 if (i == 0)

667 RegionBegin = std::prev(RegionEnd);

668 }

669

670

671 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator

672 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {

673 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);

677 }

678 DbgValues.clear();

679 FirstDbgValue = nullptr;

680}

unsigned const MachineRegisterInfo * MRI

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

const HexagonInstrInfo * TII

Machine Instruction Scheduler

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static cl::opt< int > DebugDiv("postra-sched-debugdiv", cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden)

static cl::opt< bool > EnablePostRAScheduler("post-RA-scheduler", cl::desc("Enable scheduling after register allocation"), cl::init(false), cl::Hidden)

static cl::opt< std::string > EnableAntiDepBreaking("break-anti-dependencies", cl::desc("Break post-RA scheduling anti-dependencies: " "\"critical\", \"all\", or \"none\""), cl::init("none"), cl::Hidden)

static bool enablePostRAScheduler(const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel)

static cl::opt< int > DebugMod("postra-sched-debugmod", cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

Target-Independent Code Generator Pass Configuration Options pass.

Class recording the (high level) value of a variable.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

void setPreservesCFG()

This function should be called by the pass, iff they do not:

This class works in conjunction with the post-RA scheduler to rename registers to break register anti...

virtual ~AntiDepBreaker()

void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override

Insert a noop into the instruction stream at the specified point.

bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override

Test if the given instruction should be considered a scheduling boundary.

Itinerary data supplied by a subtarget to be used by a target.

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

Analysis pass which computes a MachineDominatorTree.

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

virtual MachineFunctionProperties getRequiredProperties() const

Properties which a MachineFunction may have at a given point in time.

MachineFunctionProperties & set(Property P)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

Representation of each machine instruction.

void runOnMachineFunction(const MachineFunction &MF)

runOnFunction - Prepare to answer questions about MF.

bool isWeak() const

Tests if this a weak dependence.

Scheduling unit. This is a node in the scheduling DAG.

unsigned getDepth() const

Returns the depth of this node, which is the length of the maximum path up to any node which has no p...

bool isScheduled

True once scheduled.

bool isAvailable

True once available.

SmallVector< SDep, 4 > Succs

All sunit successors.

SmallVectorImpl< SDep >::iterator succ_iterator

void setDepthToAtLeast(unsigned NewDepth)

If NewDepth is greater than this node's depth value, sets it to be the new depth value.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

A ScheduleDAG for scheduling lists of MachineInstr.

virtual void finishBlock()

Cleans up after scheduling in the given block.

virtual void startBlock(MachineBasicBlock *BB)

Prepares to perform scheduling in the given block.

virtual void exitRegion()

Called when the scheduler has finished scheduling the current region.

virtual void schedule()=0

Orders nodes according to selected style.

virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)

Initialize the DAG and common scheduler state for a new scheduling region.

void clearDAG()

Clears the DAG state (between regions).

HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

TargetInstrInfo - Interface to description of machine instruction set.

Target-Independent Code Generator Pass Configuration Options.

CodeGenOptLevel getOptLevel() const

TargetSubtargetInfo - Generic base class for all target subtargets.

enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode

unsigned getPosition() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

Sequence

A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...

This is an optimization pass for GlobalISel generic memory operations.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

AntiDepBreaker * createAggressiveAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI, TargetSubtargetInfo::RegClassVector &CriticalPathRCs)

char & PostRASchedulerID

PostRAScheduler - This pass performs post register allocation scheduling.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

CodeGenOptLevel

Code generation optimization level.

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

AntiDepBreaker * createCriticalAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI)

Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.