LLVM: include/llvm/CodeGen/MachinePipeliner.h Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H

41#define LLVM_CODEGEN_MACHINEPIPELINER_H

42

55

56#include

57

58namespace llvm {

59

60class AAResults;

62class SMSchedule;

63

66

67

68

70public:

80

81#ifndef NDEBUG

83#endif

84

85

93 nullptr;

94 };

96

97 static char ID;

98

101 }

102

104

106

107private:

111 bool swingModuloScheduler(MachineLoop &L);

112 void setPragmaPipelineOptions(MachineLoop &L);

114 bool useSwingModuloScheduler();

115 bool useWindowScheduler(bool Changed);

116};

117

118

120 SUnit *Dst = nullptr;

122 unsigned Distance = 0;

123

124public:

125

126

127

128

130 : Dst(PredOrSucc), Pred(Dep), Distance(0u) {

132

133 if (IsSucc) {

136 }

137

138

140 Distance = 1;

144 }

145 }

146

147

149

150

152

153

155

156

158

159

161

162

164

165

167

168

170

171

173

174

175

177

178

180

181

183

184

186

187

188

189

191};

192

193

194

195

196

197

200

201 struct SwingSchedulerDDGEdges {

204 };

205

206 void initEdges(SUnit *SU);

207

210

211 std::vector EdgesVec;

212 SwingSchedulerDDGEdges EntrySUEdges;

213 SwingSchedulerDDGEdges ExitSUEdges;

214

216

217 SwingSchedulerDDGEdges &getEdges(const SUnit *SU);

218 const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;

219

220public:

222

224

226};

227

228

229

232

233 std::unique_ptr DDG;

234

235

236 unsigned MII = 0;

237

238 unsigned MAX_II = 0;

239

240 bool Scheduled = false;

244 unsigned II_setByPragma = 0;

246

247

248

250

251 struct NodeInfo {

252 int ASAP = 0;

253 int ALAP = 0;

254 int ZeroLatencyDepth = 0;

255 int ZeroLatencyHeight = 0;

256

257 NodeInfo() = default;

258 };

259

260 std::vector ScheduleInfo;

261

262 enum OrderKind { BottomUp = 0, TopDown = 1 };

263

265

270

271

273

274

275

277

278

279 std::vector<std::unique_ptr> Mutations;

280

281

282 class Circuits {

283 std::vector &SUnits;

288

289 std::vector *Node2Idx;

290 unsigned NumPaths = 0u;

291 static unsigned MaxPaths;

292

293 public:

295 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {

296 Node2Idx = new std::vector(SUs.size());

297 unsigned Idx = 0;

298 for (const auto &NodeNum : Topo)

299 Node2Idx->at(NodeNum) = Idx++;

300 }

301 Circuits &operator=(const Circuits &other) = delete;

302 Circuits(const Circuits &other) = delete;

303 ~Circuits() { delete Node2Idx; }

304

305

306 void reset() {

307 Stack.clear();

310 NumPaths = 0;

311 }

312

314 bool circuit(int V, int S, NodeSetType &NodeSets,

316 void unblock(int U);

317 };

318

321 };

322

323public:

328 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),

330 P.MF->getSubtarget().getSMSMutations(Mutations);

332 Mutations.push_back(std::make_unique());

333 }

334

337

338

340

341

343

344

346

347

348

350

351

353

354

355

357 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;

358 }

359

360

362

363

364

366 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;

367 }

368

370

372

374

375

376

379 InstrChanges.find(SU);

380 if (It != InstrChanges.end())

381 return It->second.first;

382 return 0;

383 }

384

386 Mutations.push_back(std::move(Mutation));

387 }

388

390

392

393private:

394 void addLoopCarriedDependences(AAResults *AA);

395 void updatePhiDependences();

396 void changeDependences();

397 unsigned calculateResMII();

398 unsigned calculateRecMII(NodeSetType &RecNodeSets);

399 void findCircuits(NodeSetType &NodeSets);

400 void fuseRecs(NodeSetType &NodeSets);

401 void removeDuplicateNodes(NodeSetType &NodeSets);

402 void computeNodeFunctions(NodeSetType &NodeSets);

403 void registerPressureFilter(NodeSetType &NodeSets);

404 void colocateNodeSets(NodeSetType &NodeSets);

405 void checkNodeSets(NodeSetType &NodeSets);

406 void groupRemainingNodes(NodeSetType &NodeSets);

407 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,

409 void computeNodeOrder(NodeSetType &NodeSets);

410 void checkValidNodeOrder(const NodeSetType &Circuits) const;

411 bool schedulePipeline(SMSchedule &Schedule);

412 bool computeDelta(MachineInstr &MI, unsigned &Delta) const;

414 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,

415 unsigned &OffsetPos, unsigned &NewBase,

416 int64_t &NewOffset);

417 void postProcessDAG();

418

419 void setMII(unsigned ResMII, unsigned RecMII);

420

421 void setMAX_II();

422};

423

424

425

428 bool HasRecurrence = false;

429 unsigned RecMII = 0;

430 int MaxMOV = 0;

431 unsigned MaxDepth = 0;

432 unsigned Colocate = 0;

433 SUnit *ExceedPressure = nullptr;

434 unsigned Latency = 0;

435

436public:

438

441 : Nodes(S, E), HasRecurrence(true) {

442

443

444

445

446

447

448

449

450

451

452

453

454

455

458 for (auto *Node : Nodes)

459 SUnitToDistance[Node] = 0;

460

461 for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {

462 SUnit *U = Nodes[I - 1];

463 SUnit *V = Nodes[I % Nodes.size()];

465 SUnit *SuccSUnit = Succ.getDst();

466 if (V != SuccSUnit)

467 continue;

468 if (SUnitToDistance[U] + Succ.getLatency() > SUnitToDistance[V]) {

469 SUnitToDistance[V] = SUnitToDistance[U] + Succ.getLatency();

470 }

471 }

472 }

473

474 SUnit *FirstNode = Nodes[0];

475 SUnit *LastNode = Nodes[Nodes.size() - 1];

476

477 for (auto &PI : DDG->getInEdges(LastNode)) {

478

479

480

481

482 if (PI.getSrc() != FirstNode || !PI.isOrderDep() ||

484 continue;

485 SUnitToDistance[FirstNode] =

486 std::max(SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1);

487 }

488

489

490 Latency = SUnitToDistance[Nodes.front()];

491 }

492

494

496

497 template bool remove_if(UnaryPredicate P) {

499 }

500

502

504

505 unsigned size() const { return Nodes.size(); }

506

508

510

511 void setRecMII(unsigned mii) { RecMII = mii; };

512

514

516

518

520

522

523

525 for (SUnit *SU : *this) {

526 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));

527 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));

528 }

529 }

530

532

534

537 RecMII = 0;

538 HasRecurrence = false;

539 MaxMOV = 0;

540 MaxDepth = 0;

541 Colocate = 0;

542 ExceedPressure = nullptr;

543 }

544

546

547

548

549

550

552 if (RecMII == RHS.RecMII) {

553 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)

554 return Colocate < RHS.Colocate;

555 if (MaxMOV == RHS.MaxMOV)

556 return MaxDepth > RHS.MaxDepth;

557 return MaxMOV < RHS.MaxMOV;

558 }

559 return RecMII > RHS.RecMII;

560 }

561

563 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&

564 MaxDepth == RHS.MaxDepth;

565 }

566

568

572

573#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

575#endif

576};

577

578

579

581

583private:

589 const bool UseDFA;

590

592

593

595

596

597

599

600

601

602

604 int InitiationInterval = 0;

605

606 int IssueWidth;

607

608 int calculateResMIIDFA() const;

609

610 bool isOverbooked() const;

611

613

615

616

617

618

619 int positiveModulo(int Dividend, int Divisor) const {

621 int R = Dividend % Divisor;

622 if (R < 0)

623 R += Divisor;

624 return R;

625 }

626

627#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

629#endif

630

631public:

633 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),

634 DAG(DAG), UseDFA(ST->useDFAforSMS()),

635 ProcResourceMasks(SM.getNumProcResourceKinds(), 0),

636 IssueWidth(SM.IssueWidth) {

638 if (IssueWidth <= 0)

639

640 IssueWidth = 100;

643 }

644

647

648

649

651

652

653

654 void reserveResources(SUnit &SU, int Cycle);

655

657

658

660};

661

662

663

664

665

666

667

668

669

671private:

672

674

675

676 std::map<SUnit *, int> InstrToCycle;

677

678

679

680 int FirstCycle = 0;

681

682

683 int LastCycle = 0;

684

685

686 int InitiationInterval = 0;

687

688

690

691

693

695

696public:

698 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),

699 ProcItinResources(&ST, DAG) {}

700

702 ScheduledInstrs.clear();

703 InstrToCycle.clear();

704 FirstCycle = 0;

705 LastCycle = 0;

706 InitiationInterval = 0;

707 }

708

709

711 InitiationInterval = ii;

712 ProcItinResources.init(ii);

713 }

714

715

717

718

719

721

722

723 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }

724

725

726

729

730

731

734

735 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,

737 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);

738

739

743

744

747 }

748

749

750

752 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);

753 if (it == InstrToCycle.end())

754 return -1;

755 return (it->second - FirstCycle) / InitiationInterval;

756 }

757

758

759

761 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);

762 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");

763 return (it->second - FirstCycle) % InitiationInterval;

764 }

765

766

768 return (LastCycle - FirstCycle) / InitiationInterval;

769 }

770

771

773 return ScheduledInstrs[cycle];

774 }

775

779

780 std::deque<SUnit *>

782 const std::deque<SUnit *> &Instrs) const;

783

784 bool

790 std::deque<SUnit *> &Insts) const;

794

798 void dump() const;

799};

800

801}

802

803#endif

unsigned const MachineRegisterInfo * MRI

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

const HexagonInstrInfo * TII

uint64_t IntrinsicInst * II

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file implements a set that has insertion order iteration characteristics.

Represent the analysis usage information of a pass.

iterator find(const_arg_type_t< KeyT > Val)

A possibly irreducible generalization of a Loop.

Itinerary data supplied by a subtarget to be used by a target.

Represents a single loop in the control flow graph.

Generic base class for all target subtargets.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

Representation of each machine instruction.

MachineOperand class - Representation of each machine instruction operand.

The main class in the implementation of the target independent software pipeliner pass.

const TargetInstrInfo * TII

bool runOnMachineFunction(MachineFunction &MF) override

The "main" function for implementing Swing Modulo Scheduling.

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const MachineDominatorTree * MDT

const MachineLoopInfo * MLI

MachineOptimizationRemarkEmitter * ORE

RegisterClassInfo RegClassInfo

const InstrItineraryData * InstrItins

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...

SUnit * getNode(unsigned i) const

SetVector< SUnit * >::const_iterator iterator

void print(raw_ostream &os) const

bool isExceedSU(SUnit *SU)

void insert(iterator S, iterator E)

void setRecMII(unsigned mii)

void computeNodeSetInfo(SwingSchedulerDAG *SSD)

Summarize node functions for the entire node set.

unsigned count(SUnit *SU) const

void setColocate(unsigned c)

NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)

bool operator>(const NodeSet &RHS) const

Sort the node sets by importance.

int compareRecMII(NodeSet &RHS)

bool operator!=(const NodeSet &RHS) const

LLVM_DUMP_METHOD void dump() const

bool operator==(const NodeSet &RHS) const

bool remove_if(UnaryPredicate P)

void setExceedPressure(SUnit *SU)

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

Wrapper class representing virtual and physical registers.

int calculateResMII() const

void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)

ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)

void init(int II)

Initialize resources with the initiation interval II.

bool canReserveResources(SUnit &SU, int Cycle)

Check if the resources occupied by a machine instruction are available in the current state.

Kind getKind() const

Returns an enum value representing the kind of the dependence.

@ Output

A register output-dependence (aka WAW).

@ Order

Any other ordering dependency.

@ Anti

A register anti-dependence (aka WAR).

@ Data

Regular data dependence (aka true-dependence).

void setLatency(unsigned Lat)

Sets the latency for this edge.

unsigned getLatency() const

Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...

bool isAssignedRegDep() const

Tests if this is a Data dependence that is associated with a register.

bool isArtificial() const

Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...

unsigned getReg() const

Returns the register associated with this edge.

bool isBarrier() const

Tests if this is an Order dependence that is marked as a barrier.

This class represents the scheduled code.

std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const

void setInitiationInterval(int ii)

Set the initiation interval for this schedule.

SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)

Determine transitive dependences of unpipelineable instructions.

void dump() const

Utility function used for debugging to print the schedule.

bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)

Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...

int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)

Return the cycle of the earliest scheduled instruction in the dependence chain.

unsigned getMaxStageCount()

Return the maximum stage count needed for this schedule.

void print(raw_ostream &os) const

Print the schedule information to the given output.

bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const

Return true if all scheduled predecessors are loop-carried output/order dependencies.

int stageScheduled(SUnit *SU) const

Return the stage for a scheduled instruction.

bool isScheduledAtStage(SUnit *SU, unsigned StageNum)

Return true if the instruction is scheduled at the specified stage.

void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const

Order the instructions within a cycle so that the definitions occur before the uses.

bool isValidSchedule(SwingSchedulerDAG *SSD)

int getInitiationInterval() const

Return the initiation interval for this schedule.

std::deque< SUnit * > & getInstructions(int cycle)

Return the instructions that are scheduled at the specified cycle.

int getFirstCycle() const

Return the first cycle in the completed schedule.

bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const

Return true if the instruction is a definition that is loop carried and defines the use on the next i...

unsigned cycleScheduled(SUnit *SU) const

Return the cycle for a scheduled instruction.

void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)

Compute the scheduling start slot for the instruction.

SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)

bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)

bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const

Return true if the scheduled Phi has a loop carried operand.

int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)

Return the cycle of the latest scheduled instruction in the dependence chain.

int getFinalCycle() const

Return the last cycle in the finalized schedule.

void finalizeSchedule(SwingSchedulerDAG *SSD)

After the schedule has been formed, call this function to combine the instructions from the different...

Scheduling unit. This is a node in the scheduling DAG.

A ScheduleDAG for scheduling lists of MachineInstr.

const MachineLoopInfo * MLI

Mutate the DAG as a postpass after normal DAG building.

This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...

std::vector< SUnit > SUnits

The scheduling units.

MachineFunction & MF

Machine function.

SUnit ExitSU

Special node for the region exit.

A vector that has set insertion semantics.

bool remove_if(UnaryPredicate P)

Remove items from the set vector based on a predicate function.

size_type size() const

Determine the number of elements in the SetVector.

iterator end()

Get an iterator to the end of the SetVector.

typename vector_type::const_iterator const_iterator

void clear()

Completely clear the SetVector.

size_type count(const key_type &key) const

Count the number of elements of a given key in the SetVector.

bool empty() const

Determine if the SetVector is empty or not.

iterator begin()

Get an iterator to the beginning of the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...

unsigned getInstrBaseReg(SUnit *SU) const

Return the new base register that was stored away for the changed instruction.

unsigned getDepth(SUnit *Node)

The depth, in the dependence graph, for a node.

int getASAP(SUnit *Node)

Return the earliest time an instruction may be scheduled.

void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)

Apply changes to the instruction if needed.

const SwingSchedulerDDG * getDDG() const

void finishBlock() override

Clean up after the software pipeliner runs.

void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)

Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...

bool hasNewSchedule()

Return true if the loop kernel has been scheduled.

void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)

SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, const RegisterClassInfo &rci, unsigned II, TargetInstrInfo::PipelinerLoopInfo *PLI)

int getZeroLatencyDepth(SUnit *Node)

The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...

bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const

Return true for an order or output dependence that is loop carried potentially.

void schedule() override

We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...

int getMOV(SUnit *Node)

The mobility function, which the number of slots in which an instruction may be scheduled.

int getZeroLatencyHeight(SUnit *Node)

The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...

static bool classof(const ScheduleDAGInstrs *DAG)

unsigned getHeight(SUnit *Node)

The height, in the dependence graph, for a node.

int getALAP(SUnit *Node)

Return the latest time an instruction my be scheduled.

Represents a dependence between two instruction.

SUnit * getDst() const

Returns the SUnit to which the edge points (destination node).

Register getReg() const

Returns the register associated with the edge.

void setDistance(unsigned D)

Sets the distance value for the edge.

bool isBarrier() const

Returns true if the edge represents unknown scheduling barrier.

void setLatency(unsigned Latency)

Sets the latency for the edge.

bool isAntiDep() const

Returns true if the edge represents anti dependence.

bool isAssignedRegDep() const

Tests if this is a Data dependence that is associated with a register.

bool isArtificial() const

Returns true if the edge represents an artificial dependence.

bool ignoreDependence(bool IgnoreAnti) const

Returns true for DDG nodes that we ignore when computing the cost functions.

bool isOrderDep() const

Returns true if the edge represents a dependence that is not data, anti or output dependence.

unsigned getLatency() const

Returns the latency value for the edge.

SUnit * getSrc() const

Returns the SUnit from which the edge comes (source node).

unsigned getDistance() const

Returns the distance value for the edge.

bool isOutputDep() const

Returns true if the edge represents output dependence.

SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc)

Creates an edge corresponding to an edge represented by PredOrSucc and Dep in the original DAG.

Represents dependencies between instructions.

const EdgesType & getInEdges(const SUnit *SU) const

const EdgesType & getOutEdges(const SUnit *SU) const

Object returned by analyzeLoopForPipelining.

TargetInstrInfo - Interface to description of machine instruction set.

TargetSubtargetInfo - Generic base class for all target subtargets.

This class implements an extremely fast bulk output stream that can only output to a stream.

std::set< NodeId > NodeSet

This is an optimization pass for GlobalISel generic memory operations.

void initializeMachinePipelinerPass(PassRegistry &)

cl::opt< bool > SwpEnableCopyToPhi

static const int DefaultProcResSize

cl::opt< int > SwpForceIssueWidth

A command line argument to force pipeliner to use specified issue width.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Summarize the scheduling resources required for an instruction of a particular scheduling class.

Machine model for scheduling, bundling, and heuristics.

Cache the target analysis information about the loop.

MachineInstr * LoopInductionVar

SmallVector< MachineOperand, 4 > BrCond

MachineInstr * LoopCompare

std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo