LLVM: lib/Transforms/Vectorize/VPlanUnroll.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

26

27using namespace llvm;

28

29namespace {

30

31

32

33

34

35class UnrollState {

36

38

39 const unsigned UF;

40

42

43

44

46

47

48

50

51

52 void unrollReplicateRegionByUF(VPRegionBlock *VPR);

53

54

55

57

58

59

60

63

64

65

68

69 VPValue *getConstantVPV(unsigned Part) {

71 return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));

72 }

73

74public:

76 : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {}

77

79

81 if (Part == 0 || V->isLiveIn())

82 return V;

83 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&

84 "accessed value does not exist");

85 return VPV2Parts[V][Part - 1];

86 }

87

88

89

90

92 unsigned Part) {

94 auto Ins = VPV2Parts.insert({VPV, {}});

95 assert(Ins.first->second.size() == Part - 1 && "earlier parts not set");

97 }

98 }

99

100

102 auto Ins = VPV2Parts.insert({R, {}});

103 assert(Ins.second && "uniform value already added");

104 for (unsigned Part = 0; Part != UF; ++Part)

105 Ins.first->second.push_back(R);

106 }

107

109

110

111

112 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {

113 auto *Op = R->getOperand(OpIdx);

114 R->setOperand(OpIdx, getValueForPart(Op, Part));

115 }

116

117

119 for (const auto &[OpIdx, Op] : enumerate(R->operands()))

120 R->setOperand(OpIdx, getValueForPart(Op, Part));

121 }

122};

123}

124

125void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {

127 for (unsigned Part = 1; Part != UF; ++Part) {

130

133 for (const auto &[PartIVPBB, Part0VPBB] :

134 zip(VPBlockUtils::blocksOnly(PartI),

135 VPBlockUtils::blocksOnly(Part0))) {

136 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {

138 if (auto *ScalarIVSteps = dyn_cast(&PartIR)) {

139 ScalarIVSteps->addOperand(getConstantVPV(Part));

140 }

141

142 addRecipeForPart(&Part0R, &PartIR, Part);

143 }

144 }

145 }

146}

147

148void UnrollState::unrollWidenInductionByUF(

151 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());

153 auto &ID = IV->getInductionDescriptor();

154 std::optional FMFs;

155 if (isa_and_present(ID.getInductionBinOp()))

156 FMFs = ID.getInductionBinOp()->getFastMathFlags();

157

162 IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc;

163 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);

165 }

166

167 VPValue *ScalarStep = IV->getStepValue();

168 auto *ConstStep = ScalarStep->isLiveIn()

170 : nullptr;

171 if (!ConstStep || ConstStep->getValue() != 1) {

173 ScalarStep =

174 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);

176 }

177

178 unsigned MulOpc =

179 IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;

180 VPInstruction *Mul = Builder.createNaryOp(MulOpc, {VectorStep, ScalarStep},

181 FMFs, IV->getDebugLoc());

182 VectorStep = Mul;

184 }

185

186

187

188

189

190

191

192

193

194

195

196

197

199 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);

200 unsigned AddOpc =

202 for (unsigned Part = 1; Part != UF; ++Part) {

203 std::string Name =

204 Part > 1 ? "step.add." + std::to_string(Part) : "step.add";

205

207 {

208 Prev,

209 VectorStep,

210 },

211 FMFs, IV->getDebugLoc(), Name);

213 addRecipeForPart(IV, Add, Part);

214 Prev = Add;

215 }

216 IV->addOperand(VectorStep);

217 IV->addOperand(Prev);

218}

219

222

223

224 if (isa(R))

225 return;

226

227

228 if (auto *IV = dyn_cast(R)) {

229 unrollWidenInductionByUF(IV, InsertPtForPhi);

230 return;

231 }

232

233 auto *RdxPhi = dyn_cast(R);

234 if (RdxPhi && RdxPhi->isOrdered())

235 return;

236

237 auto InsertPt = std::next(R->getIterator());

238 for (unsigned Part = 1; Part != UF; ++Part) {

240 Copy->insertBefore(*R->getParent(), InsertPt);

241 addRecipeForPart(R, Copy, Part);

242 if (isa(R)) {

243 Copy->addOperand(R);

244 Copy->addOperand(getConstantVPV(Part));

245 } else if (RdxPhi) {

246 Copy->addOperand(getConstantVPV(Part));

247 } else {

248 assert(isa(R) &&

249 "unexpected header phi recipe not needing unrolled part");

250 }

251 }

252}

253

254

255void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {

259 return;

260

261 if (auto *VPI = dyn_cast(&R)) {

263 addUniformForAllParts(VPI);

264 return;

265 }

266 }

267 if (auto *RepR = dyn_cast(&R)) {

268 if (isa(RepR->getUnderlyingValue()) &&

269 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {

270

272 return;

273 }

274 if (auto *II = dyn_cast(RepR->getUnderlyingValue())) {

275 if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {

276 addUniformForAllParts(RepR);

277 return;

278 }

279 }

280 }

281

282

283 auto InsertPt = std::next(R.getIterator());

285 for (unsigned Part = 1; Part != UF; ++Part) {

287 Copy->insertBefore(VPBB, InsertPt);

288 addRecipeForPart(&R, Copy, Part);

289

291 if (match(&R, m_VPInstructionVPInstruction::FirstOrderRecurrenceSplice(

293 Copy->setOperand(0, getValueForPart(Op, Part - 1));

294 Copy->setOperand(1, getValueForPart(Op, Part));

295 continue;

296 }

297 if (auto *Red = dyn_cast(&R)) {

298 auto *Phi = cast(R.getOperand(0));

299 if (Phi->isOrdered()) {

300 auto &Parts = VPV2Parts[Phi];

301 if (Part == 1) {

303 Parts.push_back(Red);

304 }

305 Parts.push_back(Copy->getVPSingleValue());

306 Phi->setOperand(1, Copy->getVPSingleValue());

307 }

308 }

310

311

312

315 match(Copy, m_VPInstructionVPInstruction::CanonicalIVIncrementForPart(

317 Copy->addOperand(getConstantVPV(Part));

318

319 if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))

320 Copy->setOperand(0, R.getOperand(0));

321 }

322}

323

325void UnrollState::unrollBlock(VPBlockBase *VPB) {

326 auto *VPR = dyn_cast(VPB);

327 if (VPR) {

329 return unrollReplicateRegionByUF(VPR);

330

331

332

336 unrollBlock(VPB);

337 return;

338 }

339

340

341 auto *VPBB = cast(VPB);

344 if (ToSkip.contains(&R) || isa(&R))

345 continue;

346

347

348

350 if (match(&R, m_VPInstructionVPInstruction::ComputeReductionResult(

352 addUniformForAllParts(cast(&R));

353 for (unsigned Part = 1; Part != UF; ++Part)

354 R.addOperand(getValueForPart(Op1, Part));

355 continue;

356 }

358 if (match(&R, m_VPInstructionVPInstruction::ExtractFromEnd(

360 addUniformForAllParts(cast(&R));

362

363

366 R.getVPSingleValue()->replaceAllUsesWith(

367 getValueForPart(Op0, UF - Offset));

368 R.eraseFromParent();

369 } else {

370

372 }

373 continue;

374 }

375

376 auto *SingleDef = dyn_cast(&R);

378 addUniformForAllParts(SingleDef);

379 continue;

380 }

381

382 if (auto *H = dyn_cast(&R)) {

383 unrollHeaderPHIByUF(H, InsertPtForPhi);

384 continue;

385 }

386

387 unrollRecipeByUF(R);

388 }

389}

390

392 assert(UF > 0 && "Unroll factor must be positive");

396

397 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) {

399 auto *VPI = dyn_cast(&R);

400 if (VPI &&

402 VPI->getNumOperands() == 1) {

403 VPI->replaceAllUsesWith(VPI->getOperand(0));

404 VPI->eraseFromParent();

405 }

406 }

407 }

408 });

409 if (UF == 1) {

410 return;

411 }

412

413 UnrollState Unroller(Plan, UF, Ctx);

414

415

416

417

421 Unroller.unrollBlock(VPB);

422

423 unsigned Part = 1;

424

425

426

429

430

431

432 if (isa(&H)) {

433 Unroller.remapOperand(&H, 1, UF - 1);

434 continue;

435 }

436 if (Unroller.contains(H.getVPSingleValue()) ||

437 isa(&H)) {

438 Part = 1;

439 continue;

440 }

441 Unroller.remapOperands(&H, Part);

442 Part++;

443 }

444

446}

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

static const HTTPClientCleanup Cleanup

uint64_t IntrinsicInst * II

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...

This file provides utility VPlan to VPlan transformations.

static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)

This file contains the declarations of the Vectorization Plan base classes:

static const uint32_t IV[8]

This class represents an Operation in the Expression.

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

This is an important class for using LLVM in a threaded context.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.

RecipeListTy::iterator iterator

Instruction iterators...

iterator_range< iterator > phis()

Returns an iterator range over the PHI-like recipes in the block.

iterator getFirstNonPhi()

Return the position of the first non-phi node recipe in the block.

VPBlockBase is the building block of the Hierarchical Control-Flow Graph.

const VPBasicBlock * getEntryBasicBlock() const

VPBlockBase * getSingleSuccessor() const

static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)

Insert disconnected block NewBlock before Blockptr.

VPlan-based builder utility analogous to IRBuilder.

Type * getScalarType() const

Returns the scalar type of the induction.

ArrayRef< VPValue * > definedValues()

Returns an ArrayRef of the values defined by the VPDef.

VPValue * getVPValue(unsigned I)

Returns the VPValue with index I defined by the VPDef.

This is a concrete Recipe that models a single VPlan-level instruction.

@ CanonicalIVIncrementForPart

VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.

VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...

VPRegionBlock * clone() override

Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...

const VPBlockBase * getEntry() const

bool isReplicator() const

An indicator whether this region is to generate multiple replicated instances of output IR correspond...

A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.

A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...

VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...

An analysis for type-inference for VPValues.

Type * inferScalarType(const VPValue *V)

Infer the type of V. Returns the scalar type of V.

VPRecipeBase * getDefiningRecipe()

Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...

Value * getLiveInIRValue()

Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.

bool isLiveIn() const

Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.

A recipe to compute the pointers for widened memory accesses of IndexTy.

A Recipe for widening the canonical induction variable of the vector loop.

A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...

VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...

VPBasicBlock * getEntry()

VPValue & getVF()

Returns the VF of the vector loop region.

VPRegionBlock * getVectorLoopRegion()

Returns the VPRegionBlock of the vector loop.

VPValue * getOrAddLiveIn(Value *V)

Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.

bool hasScalarVFOnly() const

VPCanonicalIVPHIRecipe * getCanonicalIV()

Returns the canonical induction recipe of the vector loop.

bool match(Val *V, const Pattern &P)

BinaryVPInstruction_match< Op0_t, Op1_t, VPInstruction::BranchOnCount > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)

UnaryVPInstruction_match< Op0_t, VPInstruction::BranchOnCond > m_BranchOnCond(const Op0_t &Op0)

class_match< VPValue > m_VPValue()

Match an arbitrary VPValue and ignore it.

NodeAddr< PhiNode * > Phi

bool isUniformAcrossVFsAndUFs(VPValue *V)

Checks if V is uniform across all VF lanes and UF parts.

bool onlyFirstPartUsed(const VPValue *Def)

Returns true if only the first part of Def is used.

This is an optimization pass for GlobalISel generic memory operations.

detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)

zip iterator for two or more iteratable types.

detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order.

iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

DWARFExpression::Operation Op

static void unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx)

Explicitly unroll Plan by UF.

static void removeDeadRecipes(VPlan &Plan)

Remove dead recipes from Plan.