LLVM: lib/Transforms/Vectorize/LoopVectorizationPlanner.h Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H

25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H

26

30

31namespace {

32class GeneratedRTChecks;

33}

34

35namespace llvm {

36

49

52

53

57

58

59 template T *tryInsertInstruction(T *R) {

60 if (BB)

61 BB->insert(R, InsertPt);

62 return R;

63 }

64

68 const Twine &Name = "") {

69 return tryInsertInstruction(

71 }

72

73public:

80

81

82

87

90

91

94 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));

95 return B;

96 }

97

98

102

103 public:

104

106

107

109 : Block(InsertBlock), Point(InsertPoint) {}

110

111

112 bool isSet() const { return Block != nullptr; }

113

116 };

117

118

125

126

127

129 assert(TheBB && "Attempting to set a null insert point");

130 BB = TheBB;

131 InsertPt = BB->end();

132 }

133

134

135

137 BB = TheBB;

138 InsertPt = IP;

139 }

140

141

142

147

148

150

151

152

158 const Twine &Name = "") {

159 VPInstruction *NewVPInst = tryInsertInstruction(

160 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));

161 NewVPInst->setUnderlyingValue(Inst);

162 return NewVPInst;

163 }

166 return createInstruction(Opcode, Operands, {}, DL, Name);

167 }

171 const Twine &Name = "") {

172 return tryInsertInstruction(

173 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));

174 }

175

179 const Twine &Name = "") {

181 Opcode, Operands, ResultTy, Flags, {}, DL, Name));

182 }

183

188 return tryInsertInstruction(

189 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));

190 }

191

194 const Twine &Name = "") {

196 }

197

200 const Twine &Name = "") {

201 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,

202 Name);

203 }

204

207 const Twine &Name = "") {

208

210 Instruction::BinaryOps::Or, {LHS, RHS},

212 }

213

216 const Twine &Name = "") {

218 }

219

223 std::optional FMFs = std::nullopt) {

224 if (!FMFs)

225 return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL,

226 Name);

228 Instruction::Select, {Cond, TrueVal, FalseVal}, *FMFs, {}, DL, Name));

229 }

230

231

232

235 const Twine &Name = "") {

238 return tryInsertInstruction(

239 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));

240 }

241

242

243

246 const Twine &Name = "") {

249 return tryInsertInstruction(

250 new VPInstruction(Instruction::FCmp, {A, B}, Pred, {}, DL, Name));

251 }

252

255 const Twine &Name = "") {

256 return tryInsertInstruction(

259 }

260

264 const Twine &Name = "") {

267 }

268

271 const Twine &Name = "") {

272 return tryInsertInstruction(

275 }

276

278 const Twine &Name = "") {

279 return tryInsertInstruction(new VPPhi(IncomingValues, DL, Name));

280 }

281

285 if (EC.isScalable()) {

287 RuntimeEC = EC.getKnownMinValue() == 1

288 ? VScale

290 {VScale, RuntimeEC}, {true, false});

291 }

292 return RuntimeEC;

293 }

294

295

296

297

301 const Twine &Name = "") {

302 return tryInsertInstruction(

303 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));

304 }

305

310 return tryInsertInstruction(

311 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));

312 }

313

316 if (ResultTy == SrcTy)

317 return Op;

320 ? Instruction::Trunc

321 : Instruction::ZExt;

323 }

324

326 Type *ResultTy) {

328 if (Opcode == Instruction::Trunc)

330 else if (Opcode == Instruction::ZExt)

332 return tryInsertInstruction(

334 }

335

341 IV, Step, VF, InductionOpcode,

343 }

344

348

349

350

351

352

353

354

359

360 public:

363

366

368 };

369};

370

371

372

373

374

375

376

378

380

381

383

384

386

387

388

390

394

395

399

403

405 return !(*this == rhs);

406 }

407};

408

409

410

411

412

413

417

428 "Invalid scalable properties");

429 }

430

432

433

435

436

438};

439

440

441

443

444 Loop *OrigLoop;

445

446

448

449

451

452

454

455

457

458

460

461

463

464

466

468

470

472

474

475

477

478

480

481

482

483

484

485

486

487

488

490

491

492

493

496

497public:

504 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),

505 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}

506

507

508

509

511

512

513

515

516

517

519

520

521

523

524

525

526

527

530

531

532

533

534

535

536

537

538

539

540

545 bool VectorizingEpilogue);

546

547#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

549#endif

550

551

552

554 return any_of(VPlans,

555 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });

556 }

557

558

559

560

561 static bool

564

565

566

567

570

571

573

574

575

578

579

580

581

582

583

584

585

586

589 bool VectorizingEpilogue, MDNode *OrigLoopID,

590 std::optional OrigAverageTripCount,

591 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,

592 bool DisableRuntimeUnroll);

593

594protected:

595

596

597

599

600private:

601

602

603

604

605

607

608

609

610

611

612

613

614

617

618

619

620

622

623

624

625

626

627

628 void adjustRecipesForReductions(VPlanPtr &Plan,

631

632

633 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,

634 bool HasBranchWeights) const;

635

636#ifndef NDEBUG

637

638

639

640

641

643#endif

644

645

646

649 bool IsEpilogue = false) const;

650

651

652

655 const unsigned MaxTripCount, bool HasTail,

656 bool IsEpilogue = false) const;

657

658

659

660 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;

661};

662

663}

664

665#endif

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

const SmallVectorImpl< MachineOperand > & Cond

This file defines the SmallSet class.

This file contains the declarations of the Vectorization Plan base classes:

static const uint32_t IV[8]

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

static DebugLoc getUnknown()

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

static constexpr ElementCount getFixed(ScalarTy MinVal)

Utility class for floating point operations which can have information about relaxed accuracy require...

FastMathFlags getFastMathFlags() const

Convenience function for getting all the fast-math flags.

Convenience struct for specifying and reasoning about fast-math flags.

Represents flags for the getelementptr instruction/expression.

static GEPNoWrapFlags none()

InductionKind

This enum represents the kinds of inductions that we support.

InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...

Drive the analysis of interleaved memory accesses in the loop.

LoopVectorizationCostModel - estimates the expected speedups due to vectorization.

LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...

VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC)

VPlan & getPlanFor(ElementCount VF) const

Return the VPlan for VF.

LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)

Definition LoopVectorizationPlanner.h:498

VectorizationFactor planInVPlanNativePath(ElementCount UserVF)

Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.

void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)

Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...

void buildVPlans(ElementCount MinVF, ElementCount MaxVF)

Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...

VectorizationFactor computeBestVF()

Compute and return the most profitable vectorization factor.

DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool VectorizingEpilogue)

Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...

unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)

void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)

Emit remarks for recipes with invalid costs in the available VPlans.

static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)

Test a Predicate on a Range of VF's.

void printPlans(raw_ostream &O)

void plan(ElementCount UserVF, unsigned UserIC)

Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...

void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const

Create a check to Plan to see if the vector loop should be executed based on its trip count.

bool hasPlanWithVF(ElementCount VF) const

Look through the existing plans and return true if we have one with vectorization factor VF.

Definition LoopVectorizationPlanner.h:553

Utility class for getting and setting loop vectorizer hints in the form of loop metadata.

This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...

Represents a single loop in the control flow graph.

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

This class represents an analyzed expression in the program.

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Provides information about what library functions are available for the current target.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.

RecipeListTy::iterator iterator

Instruction iterators...

InsertPointGuard(const InsertPointGuard &)=delete

InsertPointGuard(VPBuilder &B)

Definition LoopVectorizationPlanner.h:361

InsertPointGuard & operator=(const InsertPointGuard &)=delete

~InsertPointGuard()

Definition LoopVectorizationPlanner.h:367

InsertPoint - A saved insertion point.

Definition LoopVectorizationPlanner.h:99

VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)

Creates a new insertion point at the given location.

Definition LoopVectorizationPlanner.h:108

VPBasicBlock * getBlock() const

Definition LoopVectorizationPlanner.h:114

VPBasicBlock::iterator getPoint() const

Definition LoopVectorizationPlanner.h:115

VPInsertPoint()=default

Creates a new insertion point which doesn't point to anything.

bool isSet() const

Returns true if this insert point is set.

Definition LoopVectorizationPlanner.h:112

VPlan-based builder utility analogous to IRBuilder.

Definition LoopVectorizationPlanner.h:54

VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:205

VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)

Definition LoopVectorizationPlanner.h:314

void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)

This specifies that created instructions should be inserted at the specified point.

Definition LoopVectorizationPlanner.h:136

void setInsertPoint(VPRecipeBase *IP)

This specifies that created instructions should be inserted at the specified point.

Definition LoopVectorizationPlanner.h:143

VPValue * createElementCount(Type *Ty, ElementCount EC)

Definition LoopVectorizationPlanner.h:282

void restoreIP(VPInsertPoint IP)

Sets the current insert point to a previously-saved location.

Definition LoopVectorizationPlanner.h:119

VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})

Definition LoopVectorizationPlanner.h:306

VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:192

VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:214

VPBasicBlock * getInsertBlock() const

Definition LoopVectorizationPlanner.h:88

VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")

Convert the input value Current to the corresponding value of an induction with Start and Step values...

Definition LoopVectorizationPlanner.h:298

void insert(VPRecipeBase *R)

Insert R at the current insertion point.

Definition LoopVectorizationPlanner.h:149

VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)

Definition LoopVectorizationPlanner.h:221

VPBasicBlock::iterator getInsertPoint() const

Definition LoopVectorizationPlanner.h:89

VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)

Definition LoopVectorizationPlanner.h:337

VPBuilder(VPBasicBlock *InsertBB)

Definition LoopVectorizationPlanner.h:75

VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:261

VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Create a new FCmp VPInstruction with predicate Pred and operands A and B.

Definition LoopVectorizationPlanner.h:244

VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:253

static VPBuilder getToInsertAfter(VPRecipeBase *R)

Create a VPBuilder to insert after R.

Definition LoopVectorizationPlanner.h:92

VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")

Definition LoopVectorizationPlanner.h:164

VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:184

VPBuilder(VPRecipeBase *InsertPt)

Definition LoopVectorizationPlanner.h:76

VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")

Definition LoopVectorizationPlanner.h:277

VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)

Definition LoopVectorizationPlanner.h:325

VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Create a new ICmp VPInstruction with predicate Pred and operands A and B.

Definition LoopVectorizationPlanner.h:233

void clearInsertionPoint()

Clear the insertion point: created instructions will not be inserted into a block.

Definition LoopVectorizationPlanner.h:83

VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:198

VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:176

VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:168

VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)

Definition LoopVectorizationPlanner.h:345

VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)

Definition LoopVectorizationPlanner.h:77

VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Definition LoopVectorizationPlanner.h:269

void setInsertPoint(VPBasicBlock *TheBB)

This specifies that created VPInstructions should be appended to the end of the specified block.

Definition LoopVectorizationPlanner.h:128

VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.

Definition LoopVectorizationPlanner.h:153

A recipe for converting the input value IV value to the corresponding value of an IV with different s...

Recipe to expand a SCEV expression.

Class to record and manage LLVM IR flags.

A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...

This is a concrete Recipe that models a single VPlan-level instruction.

@ VScale

Returns the value for vscale.

VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.

VPBasicBlock * getParent()

Helper class to create VPRecipies from IR instructions.

A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...

This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...

VPWidenCastRecipe is a recipe to create vector cast instructions.

VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...

VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)

Return a VPValue wrapping a ConstantInt with the given type and value.

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

This is an optimization pass for GlobalISel generic memory operations.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

cl::opt< unsigned > ForceTargetInstructionCost

DWARFExpression::Operation Op

cl::opt< bool > EnableVPlanNativePath

std::unique_ptr< VPlan > VPlanPtr

ElementCount FixedVF

Definition LoopVectorizationPlanner.h:415

ElementCount ScalableVF

Definition LoopVectorizationPlanner.h:416

FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)

Definition LoopVectorizationPlanner.h:424

FixedScalableVFPair(const ElementCount &Max)

Definition LoopVectorizationPlanner.h:421

bool hasVector() const

Definition LoopVectorizationPlanner.h:437

static FixedScalableVFPair getNone()

Definition LoopVectorizationPlanner.h:431

FixedScalableVFPair()

Definition LoopVectorizationPlanner.h:418

A range of powers-of-2 vectorization factors with fixed start and adjustable end.

Struct to hold various analysis needed for cost computations.

TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.

Definition LoopVectorizationPlanner.h:377

InstructionCost Cost

Cost of the loop with that width.

Definition LoopVectorizationPlanner.h:382

ElementCount MinProfitableTripCount

The minimum trip count required to make vectorization profitable, e.g.

Definition LoopVectorizationPlanner.h:389

bool operator==(const VectorizationFactor &rhs) const

Definition LoopVectorizationPlanner.h:400

ElementCount Width

Vector width with best cost.

Definition LoopVectorizationPlanner.h:379

InstructionCost ScalarCost

Cost of the scalar loop.

Definition LoopVectorizationPlanner.h:385

bool operator!=(const VectorizationFactor &rhs) const

Definition LoopVectorizationPlanner.h:404

static VectorizationFactor Disabled()

Width 1 means no vectorization, cost 0 means uncomputed cost.

Definition LoopVectorizationPlanner.h:396

VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)

Definition LoopVectorizationPlanner.h:391