LLVM: lib/Transforms/Vectorize/VPlanSLP.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

33#include

34#include

35#include

36#include

37

38using namespace llvm;

39

40#define DEBUG_TYPE "vplan-slp"

41

42

44

46 Old2NewTy &Old2New,

51 visitBlock(Base, Old2New, IAI);

52 }

53}

54

55void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,

58 for (VPRecipeBase &VPI : *VPBB) {

60 continue;

62 if (!VPInst)

63 continue;

65 if (!Inst)

66 continue;

68 if (!IG)

69 continue;

70

71 auto NewIGIter = Old2New.find(IG);

72 if (NewIGIter == Old2New.end())

73 Old2New[IG] = new InterleaveGroup(

74 IG->getFactor(), IG->isReverse(), IG->getAlign());

75

76 if (Inst == IG->getInsertPos())

77 Old2New[IG]->setInsertPos(VPInst);

78

79 InterleaveGroupMap[VPInst] = Old2New[IG];

80 InterleaveGroupMap[VPInst]->insertMember(

81 VPInst, IG->getIndex(Inst),

82 Align(IG->isReverse() ? (-1) * int(IG->getFactor())

83 : IG->getFactor()));

84 }

86 visitRegion(Region, Old2New, IAI);

87 } else {

89 }

90}

91

94 Old2NewTy Old2New;

96}

97

99

100

101 CompletelySLP = false;

102 return nullptr;

103}

104

108 })) {

109 unsigned BundleSize = 0;

110 for (VPValue *V : Operands) {

112 assert(T->isVectorTy() && "Only scalar types supported for now");

113 BundleSize += T->getScalarSizeInBits();

114 }

115 WidestBundleBits = std::max(WidestBundleBits, BundleSize);

116 }

117

118 auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);

120 "Already created a combined instruction for the operand bundle");

121 (void)Res;

122}

123

125

126 if (all\_of(Operands, [](VPValue *Op) {

129 })) {

130 LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");

131 return false;

132 }

133

134

135

136

137

140 unsigned Opcode = OriginalInstr->getOpcode();

142 if (all\_of(Operands, [Opcode, Width](VPValue *Op) {

144 return I->getOpcode() == Opcode &&

145 I->getType()->getPrimitiveSizeInBits() == Width;

146 })) {

147 LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");

148 return false;

149 }

150

151

152 if (any_of(Operands, [this](VPValue *Op) {

154 })) {

155 LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");

156 return false;

157 }

158

160 [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {

161 LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");

162 return false;

163 }

164

165

166

167

168

169 if (Opcode == Instruction::Load) {

170 unsigned LoadsSeen = 0;

174 if (!VPI)

175 return false;

176 if (VPI->getOpcode() == Instruction::Load &&

178 LoadsSeen++;

179

180 if (LoadsSeen == Operands.size())

181 break;

182 if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {

184 dbgs() << "VPSLP: instruction modifying memory between loads\n");

185 return false;

186 }

187 }

188

189 if (all\_of(Operands, [](VPValue *Op) {

191 ->isSimple();

192 })) {

193 LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");

194 return false;

195 }

196 }

197

198 if (Opcode == Instruction::Store)

199 if (all\_of(Operands, [](VPValue *Op) {

201 ->isSimple();

202 })) {

203 LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");

204 return false;

205 }

206

207 return true;

208}

209

211 unsigned OperandIndex) {

213 for (VPValue *V : Values) {

214

216 Operands.push_back(U->getOperand(OperandIndex));

217 }

218 return Operands;

219}

220

225

230

231 switch (VPI->getOpcode()) {

232 case Instruction::Load:

233 llvm_unreachable("Loads terminate a tree, no need to get operands");

234 case Instruction::Store:

235 Result.push_back(getOperands(Values, 0));

236 break;

237 default:

238 for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)

240 break;

241 }

242

243 return Result;

244}

245

246

251 }))

252 return std::nullopt;

253 return {Opcode};

254}

255

256

257

260 if (A->getOpcode() != B->getOpcode())

261 return false;

262

263 if (A->getOpcode() != Instruction::Load &&

264 A->getOpcode() != Instruction::Store)

265 return true;

268

269 return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);

270}

271

272

273

278

279 if (!I1 || !I2)

280 return 0;

281

282 if (MaxLevel == 0)

284

285 unsigned Score = 0;

286 for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)

287 for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)

288 Score +=

289 getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);

290 return Score;

291}

292

293std::pair<VPlanSlp::OpMode, VPValue *>

297 assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&

298 "Currently we only handle load and commutative opcodes");

300

304 for (auto *Candidate : Candidates) {

309 << " ");

310 BestCandidates.push_back(Candidate);

311 }

312 }

314

315 if (BestCandidates.empty())

316 return {OpMode::Failed, nullptr};

317

318 if (BestCandidates.size() == 1)

319 return {Mode, BestCandidates[0]};

320

321 VPValue *Best = nullptr;

322 unsigned BestScore = 0;

324 unsigned PrevScore = ~0u;

325 bool AllSame = true;

326

327

328 for (auto *Candidate : BestCandidates) {

330 if (PrevScore == ~0u)

331 PrevScore = Score;

332 if (PrevScore != Score)

333 AllSame = false;

334 PrevScore = Score;

335

336 if (Score > BestScore) {

337 BestScore = Score;

338 Best = Candidate;

339 }

340 }

341 if (!AllSame)

342 break;

343 }

346 << "\n");

347 Candidates.erase(Best);

348

349 return {Mode, Best};

350}

351

353 SmallVector<MultiNodeOpTy, 4> FinalOrder;

355 FinalOrder.reserve(MultiNodeOps.size());

356 Mode.reserve(MultiNodeOps.size());

357

359

360 for (auto &Operands : MultiNodeOps) {

361 FinalOrder.push_back({Operands.first, {Operands.second[0]}});

363 Instruction::Load)

364 Mode.push_back(OpMode::Load);

365 else

366 Mode.push_back(OpMode::Opcode);

367 }

368

369 for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {

370 LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");

371 SmallPtrSet<VPValue *, 4> Candidates;

373 for (auto Ops : MultiNodeOps) {

376 << " ");

377 Candidates.insert(Ops.second[Lane]);

378 }

380

381 for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {

383 if (Mode[Op] == OpMode::Failed)

384 continue;

385

386 VPValue *Last = FinalOrder[Op].second[Lane - 1];

387 std::pair<OpMode, VPValue *> Res =

388 getBest(Mode[Op], Last, Candidates, IAI);

389 if (Res.second)

390 FinalOrder[Op].second.push_back(Res.second);

391 else

392

393 FinalOrder[Op].second.push_back(markFailed());

394 }

395 }

396

397 return FinalOrder;

398}

399

400#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

402 dbgs() << " Ops: ";

403 for (auto *Op : Values) {

405 if (auto *Instr = VPInstr->getUnderlyingInstr()) {

407 continue;

408 }

409 dbgs() << " nullptr | ";

410 }

411 dbgs() << "\n";

412}

413#endif

414

416 assert(!Values.empty() && "Need some operands!");

417

418

419 auto I = BundleToCombined.find(to_vector<4>(Values));

420 if (I != BundleToCombined.end()) {

421#ifndef NDEBUG

422

423

424

425 for (auto *V : Values) {

426 auto UI = V->user_begin();

427 auto *FirstUser = *UI++;

428 while (UI != V->user_end()) {

429 assert(*UI == FirstUser && "Currently we only support SLP trees.");

430 UI++;

431 }

432 }

433#endif

434 return I->second;

435 }

436

437

439 dbgs() << "buildGraph: ";

440 dumpBundle(Values);

441 });

442

443 if (!areVectorizable(Values))

444 return markFailed();

445

446 assert(getOpcode(Values) && "Opcodes for all values must match");

447 unsigned ValuesOpcode = *getOpcode(Values);

448

451 bool MultiNodeRoot = !MultiNodeActive;

452 MultiNodeActive = true;

453 for (auto &Operands : getOperands(Values)) {

455 dbgs() << " Visiting Commutative";

456 dumpBundle(Operands);

457 });

458

459 auto OperandsOpcode = getOpcode(Operands);

460 if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {

461 LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");

463 } else {

465

466

470 MultiNodeOps.emplace_back(Op, Operands);

471 }

472 }

473

474 if (MultiNodeRoot) {

476 MultiNodeActive = false;

477

478 auto FinalOrder = reorderMultiNodeOps();

479

480 MultiNodeOps.clear();

481 for (auto &Ops : FinalOrder) {

483 Ops.first->replaceAllUsesWith(NewOp);

484 for (unsigned i = 0; i < CombinedOperands.size(); i++)

485 if (CombinedOperands[i] == Ops.first)

486 CombinedOperands[i] = NewOp;

487 delete Ops.first;

488 Ops.first = NewOp;

489 }

491 }

492 } else {

494 if (ValuesOpcode == Instruction::Load)

495 for (VPValue *V : Values)

497 else

498 for (auto &Operands : getOperands(Values))

500 }

501

502 unsigned Opcode;

503 switch (ValuesOpcode) {

504 case Instruction::Load:

506 break;

507 case Instruction::Store:

509 break;

510 default:

511 Opcode = ValuesOpcode;

512 break;

513 }

514

515 if (!CompletelySLP)

516 return markFailed();

517

518 assert(CombinedOperands.size() > 0 && "Need more some operands");

520 auto *VPI =

521 new VPInstruction(Opcode, CombinedOperands, {}, {}, Inst->getDebugLoc());

522

523 LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " " << Values[0]

524 << "\n");

525 addCombined(Values, VPI);

526 return VPI;

527}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file defines the DenseMap class.

const size_t AbstractManglingParser< Derived, Alloc >::NumOps

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))

This file defines the SmallVector class.

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

Definition VPlanSLP.cpp:247

static unsigned LookaheadMaxDepth

Definition VPlanSLP.cpp:43

static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)

Returns true if A and B access sequential memory if they are loads or stores or if they have identica...

Definition VPlanSLP.cpp:258

static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)

Implements getLAScore from Listing 7 in the paper.

Definition VPlanSLP.cpp:274

static bool areCommutative(ArrayRef< VPValue * > Values)

Definition VPlanSLP.cpp:221

static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue * > Values, unsigned OperandIndex)

Definition VPlanSLP.cpp:210

This file contains the declarations for VPlan-based SLP.

This file contains the declarations of the entities induced by Vectorization Plans,...

This file contains the declarations of the Vectorization Plan base classes:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

LLVM_ABI bool isCommutative() const LLVM_READONLY

Return true if the instruction is commutative:

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Drive the analysis of interleaved memory accesses in the loop.

InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const

Get the interleave group that Instr belongs to.

BlockT * getEntry() const

Get the entry BasicBlock of the Region.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

void reserve(size_type N)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

iterator getFirstNonPhi()

Return the position of the first non-phi node recipe in the block.

VPBlockBase is the building block of the Hierarchical Control-Flow Graph.

This is a concrete Recipe that models a single VPlan-level instruction.

LLVM_ABI_FOR_TEST VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)

Definition VPlanSLP.cpp:92

InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const

Get the interleave group that Instr belongs to.

VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...

This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...

LLVM_ABI_FOR_TEST VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)

Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...

Definition VPlanSLP.cpp:415

VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...

LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()

Returns the VPRegionBlock of the vector loop.

Type * getType() const

All values are typed, get the type of this value.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

NodeAddr< InstrNode * > Instr

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

auto cast_or_null(const Y &Val)

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

DWARFExpression::Operation Op

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.