LLVM: lib/Transforms/Vectorize/VPlanAnalysis.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

21

22using namespace llvm;

24

25#define DEBUG_TYPE "vplan"

26

29 if (const auto *CanIV = dyn_cast(

30 &LoopRegion->getEntryBasicBlock()->front())) {

31 CanonicalIVTy = CanIV->getScalarType();

32 return;

33 }

34 }

35

36

37

38 auto *TC = Plan.getTripCount();

39 if (TC->isLiveIn()) {

40 CanonicalIVTy = TC->getLiveInIRValue()->getType();

41 return;

42 }

44}

45

46Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {

48 for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {

49 VPValue *Inc = R->getIncomingValue(I);

51 "different types inferred for different incoming values");

52 CachedTypes[Inc] = ResTy;

53 }

54 return ResTy;

55}

56

57Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {

58

59

60 auto SetResultTyFromOp = [this, R]() {

62 for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) {

63 VPValue *OtherV = R->getOperand(Op);

65 "different types inferred for different operands");

66 CachedTypes[OtherV] = ResTy;

67 }

68 return ResTy;

69 };

70

71 unsigned Opcode = R->getOpcode();

73 return SetResultTyFromOp();

74

75 switch (Opcode) {

76 case Instruction::ExtractElement:

77 case Instruction::Freeze:

81 case Instruction::Select: {

83 VPValue *OtherV = R->getOperand(2);

85 "different types inferred for different operands");

86 CachedTypes[OtherV] = ResTy;

87 return ResTy;

88 }

89 case Instruction::ICmp:

90 case Instruction::FCmp:

94 "different types inferred for different operands");

101 }

104 case Instruction::PHI:

105

106

116 return SetResultTyFromOp();

126 return VecTy->getElementType();

127 return BaseTy;

128 }

134 "LogicalAnd operands should be bool");

139

144 default:

145 break;

146 }

147

149 dbgs() << "LV: Found unhandled opcode for: ";

150 R->getVPSingleValue()->dump();

151 });

153}

154

155Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {

156 unsigned Opcode = R->getOpcode();

161 "types for both operands must match for binary op");

162 CachedTypes[R->getOperand(1)] = ResTy;

163 return ResTy;

164 }

165

166 switch (Opcode) {

167 case Instruction::ICmp:

168 case Instruction::FCmp:

170 case Instruction::FNeg:

171 case Instruction::Freeze:

173 case Instruction::ExtractValue: {

174 assert(R->getNumOperands() == 2 && "expected single level extractvalue");

177 return StructTy->getTypeAtIndex(CI->getZExtValue());

178 }

179 default:

180 break;

181 }

182

183

185 dbgs() << "LV: Found unhandled opcode for: ";

186 R->getVPSingleValue()->dump();

187 });

189}

190

193 return CI.getType();

194}

195

198 "Store recipes should not define any values");

200}

201

204 VPValue *OtherV = R->getOperand(2);

206 "different types inferred for different operands");

207 CachedTypes[OtherV] = ResTy;

208 return ResTy;

209}

210

212 unsigned Opcode = R->getUnderlyingInstr()->getOpcode();

213

218 "inferred types for operands of binary op don't match");

219 CachedTypes[R->getOperand(1)] = ResTy;

220 return ResTy;

221 }

222

224 return R->getUnderlyingInstr()->getType();

225

226 switch (Opcode) {

227 case Instruction::Call: {

228 unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);

229 return cast(R->getOperand(CallIdx)->getLiveInIRValue())

230 ->getReturnType();

231 }

232 case Instruction::Select: {

235 "inferred types for operands of select op don't match");

236 CachedTypes[R->getOperand(2)] = ResTy;

237 return ResTy;

238 }

239 case Instruction::ICmp:

240 case Instruction::FCmp:

242 case Instruction::Alloca:

243 case Instruction::ExtractValue:

244 return R->getUnderlyingInstr()->getType();

245 case Instruction::Freeze:

246 case Instruction::FNeg:

247 case Instruction::GetElementPtr:

249 case Instruction::Load:

250 return cast(R->getUnderlyingInstr())->getType();

251 case Instruction::Store:

252

253

254

256 default:

257 break;

258 }

259

261 dbgs() << "LV: Found unhandled opcode for: ";

262 R->getVPSingleValue()->dump();

263 });

265}

266

268 if (Type *CachedTy = CachedTypes.lookup(V))

269 return CachedTy;

270

271 if (V->isLiveIn()) {

272 if (auto *IRValue = V->getLiveInIRValue())

273 return IRValue->getType();

274

275

276 return CanonicalIVTy;

277 }

278

279 Type *ResultTy =

284 [this](const auto *R) {

285

286

287

288

290 })

291 .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(

292 [](const auto *R) { return R->getScalarType(); })

298 })

299

302 [](const auto *R) { return R->getResultType(); })

305 [this](const auto *R) { return inferScalarTypeForRecipe(R); })

306 .Case([V](const auto *R) {

307

308 return V->getUnderlyingValue()->getType();

309 })

311 return R->getSCEV()->getType();

312 })

313 .Case([this](const auto *R) {

315 })

316 .Case([this](const auto *R) {

318 });

319

320 assert(ResultTy && "could not infer type for the given VPValue");

321 CachedTypes[V] = ResultTy;

322 return ResultTy;

323}

324

327

334 continue;

336 EphRecipes.insert(RepR);

337 }

338 }

339

340

341

342

343 while (!Worklist.empty()) {

346 auto *OpR = Op->getDefiningRecipe();

347 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR))

348 continue;

350 auto *UR = dyn_cast(U);

351 return !UR || !EphRecipes.contains(UR);

352 }))

353 continue;

354 EphRecipes.insert(OpR);

356 }

357 }

358}

359

362

365 if (A == B)

366 return false;

367

369 for (auto &R : *A->getParent()) {

370 if (&R == A)

371 return true;

372 if (&R == B)

373 return false;

374 }

376 };

379 if (ParentA == ParentB)

380 return LocalComesBefore(A, B);

381

382#ifndef NDEBUG

387 Region->getNumPredecessors() == 1 && "Expected SESE region!");

388 assert(R->getParent()->size() == 1 &&

389 "A recipe in an original replicator region must be the only "

390 "recipe in its block");

392 }

393 return nullptr;

394 };

396 "No replicate regions expected at this point");

398 "No replicate regions expected at this point");

399#endif

401}

402

404 unsigned OverrideMaxNumRegs) const {

406 return LU.second > (OverrideMaxNumRegs > 0

407 ? OverrideMaxNumRegs

408 : TTI.getNumberOfRegisters(LU.first));

409 });

410}

411

415

416

417

419

420

422

424

426

427

428

431

432

433

434

435

436

439 LoopRegion);

441 if (!VPBB->getParent())

442 break;

445

446

447 for (VPValue *U : R.operands()) {

448 auto *DefR = U->getDefiningRecipe();

449

450

451

452

453

454 if (!DefR && (!U->getLiveInIRValue() ||

456 continue;

457

458

459 if (!DefR) {

460 LoopInvariants.insert(U);

461 continue;

462 }

463

464

465 EndPoint[U] = Idx2Recipe.size();

467 }

468 }

469 if (VPBB == LoopRegion->getExiting()) {

470

471

474 EndPoint[WideIV] = Idx2Recipe.size();

476 }

477 }

478 }

479 }

480

481

484

485

486

487 for (auto &Interval : EndPoint)

489

493

494 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");

495

497

498 const auto &TTICapture = TTI;

499 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {

501 (VF.isScalable() &&

502 !TTICapture.isElementTypeLegalForScalableVector(Ty)))

503 return 0;

504 return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));

505 };

506

507

508

509

510

511 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {

513

514

515 VPValueList &List = TransposeEnds[Idx];

518

519

520

521 if (none_of(R->definedValues(),

522 [&Ends](VPValue *Def) { return Ends.count(Def); }) &&

523 !R->mayHaveSideEffects())

524 continue;

525

526

527

528

532 continue;

533

534

535 for (unsigned J = 0, E = VFs.size(); J < E; ++J) {

536

537

538

539

540

542

543 for (auto *VPV : OpenIntervals) {

544

545

546

547

548

552 continue;

553

554 if (VFs[J].isScalar() ||

560 unsigned ClassID =

562

563

565 } else {

566

567

568 unsigned ScaleFactor =

571 if (ScaleFactor > 1) {

572 VF = VFs[J].divideCoefficientBy(ScaleFactor);

573 LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]

574 << " to " << VF << " for " << *R << "\n";);

575 }

576

578 unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);

579 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);

580 }

581 }

582

583 for (const auto &Pair : RegUsage) {

584 auto &Entry = MaxUsages[J][Pair.first];

585 Entry = std::max(Entry, Pair.second);

586 }

587 }

588

589 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "

590 << OpenIntervals.size() << '\n');

591

592

593

594 for (VPValue *DefV : R->definedValues())

596 OpenIntervals.insert(DefV);

597 }

598

599

600

601

602

604 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {

605

606

607

609

610 for (auto *In : LoopInvariants) {

611

612

614

616 unsigned ClassID = TTI.getRegisterClassForType(

618 Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF);

619 }

620

622 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';

623 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()

624 << " item\n";

625 for (const auto &pair : MaxUsages[Idx]) {

626 dbgs() << "LV(REG): RegisterClass: "

627 << TTI.getRegisterClassName(pair.first) << ", " << pair.second

628 << " registers\n";

629 }

630 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()

631 << " item\n";

632 for (const auto &pair : Invariant) {

633 dbgs() << "LV(REG): RegisterClass: "

634 << TTI.getRegisterClassName(pair.first) << ", " << pair.second

635 << " registers\n";

636 }

637 });

638

641 RUs[Idx] = RU;

642 }

643

644 return RUs;

645}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

ReachingDefInfo InstSet & ToRemove

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

std::pair< uint64_t, uint64_t > Interval

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

This pass exposes codegen information to IR-level passes.

This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...

This file implements dominator tree analysis for a single level of a VPlan's H-CFG.

This file contains the declarations of different VPlan-related auxiliary helpers.

This file contains the declarations of the Vectorization Plan base classes:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

Implements a dense probed hash-table based set.

Core dominator tree base class.

bool properlyDominates(const DomTreeNodeBase< VPBlockBase > *A, const DomTreeNodeBase< VPBlockBase > *B) const

constexpr bool isVector() const

One or more elements.

static constexpr ElementCount getFixed(ScalarTy MinVal)

bool isBitwiseLogicOp() const

Return true if this is and/or/xor.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

bool erase(PtrType Ptr)

Remove pointer from the set.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...

TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)

Add a case on the given type.

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI Type * getVoidTy(LLVMContext &C)

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...

VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.

iterator_range< iterator > phis()

Returns an iterator range over the PHI-like recipes in the block.

A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.

VPBlockBase is the building block of the Hierarchical Control-Flow Graph.

const VPBasicBlock * getEntryBasicBlock() const

static auto blocksOnly(const T &Range)

Return an iterator range over Range which only includes BlockTy blocks.

A recipe for generating conditional branches on the bits of a mask.

Canonical scalar induction phi of the vector loop.

A recipe for converting the input value IV value to the corresponding value of an IV with different s...

bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)

Definition VPlanAnalysis.cpp:363

A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...

Recipe to expand a SCEV expression.

A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...

This is a concrete Recipe that models a single VPlan-level instruction.

@ ExtractLane

Extracts a single lane (first operand) from a set of vector operands.

@ ComputeAnyOfResult

Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...

@ ExtractPenultimateElement

@ ResumeForEpilogue

Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...

@ Unpack

Extracts all lanes from its (non-scalable) vector operand.

@ FirstOrderRecurrenceSplice

@ ReductionStartVector

Start vector for reductions with 3 operands: the original start value, the identity value for the red...

@ BuildVector

Creates a fixed-width vector containing all operands.

@ BuildStructVector

Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...

@ CanonicalIVIncrementForPart

@ CalculateTripCountMinusVF

VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...

VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.

A recipe for handling reduction phis.

A recipe to represent inloop, ordered or partial reduction operations.

VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...

const VPBlockBase * getEntry() const

const VPBlockBase * getExiting() const

VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...

A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...

An analysis for type-inference for VPValues.

LLVMContext & getContext()

Return the LLVMContext used by the analysis.

Type * inferScalarType(const VPValue *V)

Infer the type of V. Returns the scalar type of V.

Definition VPlanAnalysis.cpp:267

VPTypeAnalysis(const VPlan &Plan)

Definition VPlanAnalysis.cpp:27

This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...

This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...

A recipe to compute a pointer to the last element of each part of a widened memory access for widened...

A recipe to compute the pointers for widened memory accesses of IndexTy.

A recipe for widening Call instructions using library calls.

A Recipe for widening the canonical induction variable of the vector loop.

VPWidenCastRecipe is a recipe to create vector cast instructions.

A recipe for handling GEP instructions.

A recipe for widening vector intrinsics.

A common base class for widening memory operations.

A recipe for widened phis.

VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...

VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...

VPValue & getVectorTripCount()

The vector trip count.

LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()

Returns the VPRegionBlock of the vector loop.

static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

static LLVM_ABI bool isValidElementType(Type *ElemTy)

Return true if the specified type is valid as a element type.

std::pair< iterator, bool > insert(const ValueT &V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

void Calculate(DomTreeT &DT)

bool match(Val *V, const Pattern &P)

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)

class_match< VPValue > m_VPValue()

Match an arbitrary VPValue and ignore it.

bool onlyScalarValuesUsed(const VPValue *Def)

Returns true if only scalar values of Def are used by all users.

unsigned getVFScaleFactor(VPRecipeBase *R)

Get the VF scaling factor applied to the recipe's output, if the recipe has one.

This is an optimization pass for GlobalISel generic memory operations.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...

SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)

Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...

Definition VPlanAnalysis.cpp:412

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

void collectEphemeralRecipesForVPlan(VPlan &Plan, DenseSet< VPRecipeBase * > &EphRecipes)

Definition VPlanAnalysis.cpp:325

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

DWARFExpression::Operation Op

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

A MapVector that performs no allocations if smaller than a certain size.

A recipe for handling first-order recurrence phis.

A struct that represents some properties of the register usage of a loop.

SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers

Holds the maximum number of concurrent live intervals in the loop.

bool exceedsMaxNumRegs(const TargetTransformInfo &TTI, unsigned OverrideMaxNumRegs=0) const

Check if any of the tracked live intervals exceeds the number of available registers for the target.

Definition VPlanAnalysis.cpp:403

SmallMapVector< unsigned, unsigned, 4 > LoopInvariantRegs

Holds the number of loop invariant values that are used in the loop.

A recipe for widening select instructions.