LLVM: lib/Transforms/Vectorize/VPlanUnroll.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

27

28using namespace llvm;

30

31namespace {

32

33

34

35

36

37class UnrollState {

38

39 VPlan &Plan;

40

41 const unsigned UF;

42

43 VPTypeAnalysis TypeInfo;

44

45

46

47 SmallPtrSet<VPRecipeBase *, 8> ToSkip;

48

49

50

51 DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;

52

53

54 void unrollReplicateRegionByUF(VPRegionBlock *VPR);

55

56

57

58 void unrollRecipeByUF(VPRecipeBase &R);

59

60

61

62

63 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,

65

66

67

68 void unrollWidenInductionByUF(VPWidenInductionRecipe *IV,

70

72 Type *CanIVIntTy = Plan.getVectorLoopRegion()->getCanonicalIVType();

73 return Plan.getConstantInt(CanIVIntTy, Part);

74 }

75

76public:

77 UnrollState(VPlan &Plan, unsigned UF) : Plan(Plan), UF(UF), TypeInfo(Plan) {}

78

79 void unrollBlock(VPBlockBase *VPB);

80

81 VPValue *getValueForPart(VPValue *V, unsigned Part) {

82 if (Part == 0 || V->isLiveIn())

83 return V;

84 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&

85 "accessed value does not exist");

86 return VPV2Parts[V][Part - 1];

87 }

88

89

90

91

92 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,

93 unsigned Part) {

95 const auto &[V, _] = VPV2Parts.try_emplace(VPV);

96 assert(V->second.size() == Part - 1 && "earlier parts not set");

97 V->second.push_back(CopyR->getVPValue(Idx));

98 }

99 }

100

101

102 void addUniformForAllParts(VPSingleDefRecipe *R) {

103 const auto &[V, Inserted] = VPV2Parts.try_emplace(R);

104 assert(Inserted && "uniform value already added");

105 for (unsigned Part = 0; Part != UF; ++Part)

106 V->second.push_back(R);

107 }

108

109 bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }

110

111

112

113 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {

114 auto *Op = R->getOperand(OpIdx);

115 R->setOperand(OpIdx, getValueForPart(Op, Part));

116 }

117

118

119 void remapOperands(VPRecipeBase *R, unsigned Part) {

121 R->setOperand(OpIdx, getValueForPart(Op, Part));

122 }

123};

124}

125

126void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {

128 for (unsigned Part = 1; Part != UF; ++Part) {

131

134 for (const auto &[PartIVPBB, Part0VPBB] :

137 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {

141 }

142

143 addRecipeForPart(&Part0R, &PartIR, Part);

144 }

145 }

146 }

147}

148

149void UnrollState::unrollWidenInductionByUF(

152 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());

154 auto &ID = IV->getInductionDescriptor();

157 Flags = ID.getInductionBinOp()->getFastMathFlags();

158

159 VPValue *ScalarStep = IV->getStepValue();

160 VPBuilder Builder(PH);

161 Type *VectorStepTy =

163 VPInstruction *VectorStep = Builder.createNaryOp(

165 Flags, IV->getDebugLoc());

166

167 ToSkip.insert(VectorStep);

168

169

170

171

172

173

174

175

176

177

178

179

180

181 VPValue *Prev = IV;

182 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);

183 unsigned AddOpc;

187 AddOpc = ID.getInductionOpcode();

188 else

189 AddOpc = Instruction::Add;

190 for (unsigned Part = 1; Part != UF; ++Part) {

191 std::string Name =

192 Part > 1 ? "step.add." + std::to_string(Part) : "step.add";

193

194 VPInstruction *Add = Builder.createNaryOp(AddOpc,

195 {

196 Prev,

197 VectorStep,

198 },

199 Flags, IV->getDebugLoc(), Name);

201 addRecipeForPart(IV, Add, Part);

202 Prev = Add;

203 }

204 IV->addOperand(VectorStep);

205 IV->addOperand(Prev);

206}

207

208void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,

210

211

213 return;

214

215

217 unrollWidenInductionByUF(IV, InsertPtForPhi);

218 return;

219 }

220

222 if (RdxPhi && RdxPhi->isOrdered())

223 return;

224

225 auto InsertPt = std::next(R->getIterator());

226 for (unsigned Part = 1; Part != UF; ++Part) {

227 VPRecipeBase *Copy = R->clone();

228 Copy->insertBefore(*R->getParent(), InsertPt);

229 addRecipeForPart(R, Copy, Part);

230 if (RdxPhi) {

231

232

233

234

237 "unexpected start VPInstruction");

238 if (Part != 1)

239 continue;

240 VPValue *StartV;

241 if (match(VPI->getOperand(2), m_One())) {

242 StartV = VPI->getOperand(1);

243 } else {

244 auto *C = VPI->clone();

245 C->setOperand(0, C->getOperand(1));

246 C->insertAfter(VPI);

247 StartV = C;

248 }

249 for (unsigned Part = 1; Part != UF; ++Part)

250 VPV2Parts[VPI][Part - 1] = StartV;

251 }

253 } else {

255 "unexpected header phi recipe not needing unrolled part");

256 }

257 }

258}

259

260

261void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {

263 return;

264

267 addUniformForAllParts(VPI);

268 return;

269 }

270 }

273 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {

274

276 return;

277 }

280 addUniformForAllParts(RepR);

281 return;

282 }

283 }

284

285

286 auto InsertPt = std::next(R.getIterator());

287 VPBasicBlock &VPBB = *R.getParent();

288 for (unsigned Part = 1; Part != UF; ++Part) {

289 VPRecipeBase *Copy = R.clone();

290 Copy->insertBefore(VPBB, InsertPt);

291 addRecipeForPart(&R, Copy, Part);

292

293 VPValue *Op;

296 Copy->setOperand(0, getValueForPart(Op, Part - 1));

297 Copy->setOperand(1, getValueForPart(Op, Part));

298 continue;

299 }

302 if (Phi && Phi->isOrdered()) {

303 auto &Parts = VPV2Parts[Phi];

304 if (Part == 1) {

305 Parts.clear();

306 Parts.push_back(Red);

307 }

308 Parts.push_back(Copy->getVPSingleValue());

309 Phi->setOperand(1, Copy->getVPSingleValue());

310 }

311 }

313

314

315

316 if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,

317 VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||

321

323 Copy->setOperand(0, R.getOperand(0));

324 }

325}

326

327void UnrollState::unrollBlock(VPBlockBase *VPB) {

329 if (VPR) {

331 return unrollReplicateRegionByUF(VPR);

332

333

334

335 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>

337 for (VPBlockBase *VPB : RPOT)

338 unrollBlock(VPB);

339 return;

340 }

341

342

347 continue;

348

349

350

351 VPValue *Op1;

362 for (unsigned Part = 1; Part != UF; ++Part)

363 R.addOperand(getValueForPart(Op1, Part));

364 continue;

365 }

366 VPValue *Op0;

369 for (unsigned Part = 1; Part != UF; ++Part)

370 R.addOperand(getValueForPart(Op1, Part));

371 continue;

372 }

373

379 bool IsPenultimatePart =

381 unsigned PartIdx = IsPenultimatePart ? UF - 2 : UF - 1;

382

383 I->replaceAllUsesWith(getValueForPart(Op0, PartIdx));

384 continue;

385 }

386

387 R.setOperand(0, getValueForPart(Op0, UF - 1));

388 continue;

389 }

390

393 addUniformForAllParts(SingleDef);

394 continue;

395 }

396

398 unrollHeaderPHIByUF(H, InsertPtForPhi);

399 continue;

400 }

401

402 unrollRecipeByUF(R);

403 }

404}

405

407 assert(UF > 0 && "Unroll factor must be positive");

411

415 if (VPI &&

417 VPI->getNumOperands() == 1) {

418 VPI->replaceAllUsesWith(VPI->getOperand(0));

419 VPI->eraseFromParent();

420 }

421 }

422 }

423 });

424 if (UF == 1) {

425 return;

426 }

427

428 UnrollState Unroller(Plan, UF);

429

430

431

432

436 Unroller.unrollBlock(VPB);

437

438 unsigned Part = 1;

439

440

441

444

445

446

448 Unroller.remapOperand(&H, 1, UF - 1);

449 continue;

450 }

451 if (Unroller.contains(H.getVPSingleValue())) {

452 Part = 1;

453 continue;

454 }

455 Unroller.remapOperands(&H, Part);

456 Part++;

457 }

458

460}

461

462

463

464

471 auto LaneDefs = Def2LaneDefs.find(Op);

472 if (LaneDefs != Def2LaneDefs.end())

473 return LaneDefs->second[Lane.getKnownLane()];

474

476 return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});

477 }

478

479

482

483

484 auto LaneDefs = Def2LaneDefs.find(Op);

485 if (LaneDefs != Def2LaneDefs.end()) {

487 continue;

488 }

490

491 [[maybe_unused]] bool Matched =

493 assert(Matched && "original op must have been Unpack");

494 auto *ExtractPart =

498 continue;

499 }

502 continue;

503 }

504

505

509 continue;

510 }

512 VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});

514 }

515

518

519

520

522 true, nullptr,

523 *RepR, *RepR, RepR->getDebugLoc());

524 } else {

526 "DefR must be a VPReplicateRecipe or VPInstruction");

527 New = DefR->clone();

528 for (const auto &[Idx, Op] : enumerate(NewOps)) {

529 New->setOperand(Idx, Op);

530 }

531 }

532 New->insertBefore(DefR);

533 return New;

534}

535

539

540

541

546 auto VPBBsToUnroll =

548

549

550

552

553

563 continue;

564

567 if (DefR->getNumUsers() == 0) {

568

571 DefR->eraseFromParent();

572 continue;

573 }

574

579

580 Def2LaneDefs[DefR] = LaneDefs;

581

582

583 DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {

584 return U.usesFirstLaneOnly(DefR);

585 });

586

587

588

593 continue;

594 assert(VPI->getNumOperands() == 1 &&

595 "Build(Struct)Vector must have a single operand before "

596 "replicating by VF");

597 VPI->setOperand(0, LaneDefs[0]);

599 VPI->addOperand(LaneDef);

600 }

602 }

603 }

605 R->eraseFromParent();

606}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

ReachingDefInfo InstSet & ToRemove

static const HTTPClientCleanup Cleanup

MachineInstr unsigned OpIdx

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...

static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)

Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.

This file contains the declarations of different VPlan-related auxiliary helpers.

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

This file provides utility VPlan to VPlan transformations.

static VPValue * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, VPSingleDefRecipe *DefR, VPLane Lane, const DenseMap< VPValue *, SmallVector< VPValue * > > &Def2LaneDefs)

Create a single-scalar clone of DefR (must be a VPReplicateRecipe or VPInstruction) for lane Lane.

Definition VPlanUnroll.cpp:466

static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)

This file contains the declarations of the Vectorization Plan base classes:

static const uint32_t IV[8]

LLVM_ABI LLVMContext & getContext() const

Get the context in which this basic block lives.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isPointerTy() const

True if this is an instance of PointerType.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.

RecipeListTy::iterator iterator

Instruction iterators...

iterator_range< iterator > phis()

Returns an iterator range over the PHI-like recipes in the block.

iterator getFirstNonPhi()

Return the position of the first non-phi node recipe in the block.

VPBlockBase is the building block of the Hierarchical Control-Flow Graph.

const VPBasicBlock * getEntryBasicBlock() const

VPBlockBase * getSingleSuccessor() const

static auto blocksOnly(const T &Range)

Return an iterator range over Range which only includes BlockTy blocks.

static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)

Insert disconnected block NewBlock before Blockptr.

VPlan-based builder utility analogous to IRBuilder.

ArrayRef< VPValue * > definedValues()

Returns an ArrayRef of the values defined by the VPDef.

VPValue * getVPValue(unsigned I)

Returns the VPValue with index I defined by the VPDef.

BasicBlock * getIRBasicBlock() const

@ WideIVStep

Scale the first operand (vector step) by the second operand (scalar-step).

@ ExtractPenultimateElement

@ Unpack

Extracts all lanes from its (non-scalable) vector operand.

@ ReductionStartVector

Start vector for reductions with 3 operands: the original start value, the identity value for the red...

@ BuildVector

Creates a fixed-width vector containing all operands.

@ BuildStructVector

Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...

@ CanonicalIVIncrementForPart

In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...

Kind getKind() const

Returns the Kind of lane offset.

unsigned getKnownLane() const

Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...

@ ScalableLast

For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...

VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.

VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...

VPRegionBlock * clone() override

Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...

const VPBlockBase * getEntry() const

bool isReplicator() const

An indicator whether this region is to generate multiple replicated instances of output IR correspond...

VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...

VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...

VPSingleDefRecipe * clone() override=0

Clone the current recipe.

Type * inferScalarType(const VPValue *V)

Infer the type of V. Returns the scalar type of V.

This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...

This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...

VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...

VPBasicBlock * getEntry()

VPValue & getVF()

Returns the VF of the vector loop region.

VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)

Return a VPValue wrapping a ConstantInt with the given type and value.

LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()

Returns the VPRegionBlock of the vector loop.

bool hasScalarVFOnly() const

VPIRBasicBlock * getScalarHeader() const

Return the VPIRBasicBlock wrapping the header of the scalar loop.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

bool match(Val *V, const Pattern &P)

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)

VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)

VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()

class_match< VPValue > m_VPValue()

Match an arbitrary VPValue and ignore it.

VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()

BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...

VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)

VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)

bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)

Match a VPInstruction, capturing if we match.

VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()

VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)

NodeAddr< PhiNode * > Phi

bool isSingleScalar(const VPValue *VPV)

Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...

bool isUniformAcrossVFsAndUFs(VPValue *V)

Checks if V is uniform across all VF lanes and UF parts.

bool onlyFirstPartUsed(const VPValue *Def)

Returns true if only the first part of Def is used.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)

zip iterator for two or more iteratable types.

detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order.

iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...

detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)

Returns a concatenated range across two or more ranges.

auto reverse(ContainerTy &&C)

bool isa_and_present(const Y &Val)

isa_and_present - Functionally identical to isa, except that a null value is accepted.

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

DWARFExpression::Operation Op

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

static void unrollByUF(VPlan &Plan, unsigned UF)

Explicitly unroll Plan by UF.

Definition VPlanUnroll.cpp:406

static void removeDeadRecipes(VPlan &Plan)

Remove dead recipes from Plan.

static void replicateByVF(VPlan &Plan, ElementCount VF)

Replace each replicating VPReplicateRecipe and VPInstruction outside of any replicate region in Plan ...

Definition VPlanUnroll.cpp:536