LLVM: lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

29#include "llvm/IR/IntrinsicsHexagon.h"

41#include

42#include

43#include

44#include

45

46using namespace llvm;

47

48#define DEBUG_TYPE "hexagon-vlcr"

49

50STATISTIC(HexagonNumVectorLoopCarriedReuse,

51 "Number of values that were reused from a previous iteration.");

52

54 "hexagon-vlcr-iteration-lim", cl::Hidden,

55 cl::desc("Maximum distance of loop carried dependences that are handled"),

57

58namespace llvm {

59

62

63}

64

65namespace {

66

67

69

70 class DepChain {

71 ChainOfDependences Chain;

72

73 public:

74 bool isIdentical(DepChain &Other) const {

76 return false;

77 ChainOfDependences &OtherChain = Other.getChain();

78 for (int i = 0; i < size(); ++i) {

79 if (Chain[i] != OtherChain[i])

80 return false;

81 }

82 return true;

83 }

84

85 ChainOfDependences &getChain() {

86 return Chain;

87 }

88

89 int size() const {

90 return Chain.size();

91 }

92

93 void clear() {

94 Chain.clear();

95 }

96

98 Chain.push_back(I);

99 }

100

101 int iterations() const {

102 return size() - 1;

103 }

104

106 return Chain.front();

107 }

108

110 return Chain.back();

111 }

112

113 Instruction *&operator[](const int index) {

114 return Chain[index];

115 }

116

118 };

119

122 const ChainOfDependences &CD = D.Chain;

123 int ChainSize = CD.size();

124 OS << "**DepChain Start::**\n";

125 for (int i = 0; i < ChainSize -1; ++i) {

126 OS << *(CD[i]) << " -->\n";

127 }

128 OS << *CD[ChainSize-1] << "\n";

129 return OS;

130 }

131

132 struct ReuseValue {

134

135

136

137

139 std::map<Instruction *, DepChain *> DepChains;

140 int Iterations = -1;

141

142 ReuseValue() = default;

143

144 void reset() {

145 Inst2Replace = nullptr;

146 BackedgeInst = nullptr;

147 DepChains.clear();

148 Iterations = -1;

149 }

150 bool isDefined() { return Inst2Replace != nullptr; }

151 };

152

155 OS << "** ReuseValue ***\n";

156 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";

157 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";

158 return OS;

159 }

160

161 class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {

162 public:

163 static char ID;

164

165 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {

168 }

169

171 return "Hexagon-specific loop carried reuse for HVX vectors";

172 }

173

179 }

180

182 };

183

184 class HexagonVectorLoopCarriedReuse {

185 public:

186 HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};

187

188 bool run();

189

190 private:

192 std::set<Instruction *> ReplacedInsts;

193 Loop *CurLoop;

194 ReuseValue ReuseCandidate;

195

196 bool doVLCR();

197 void findLoopCarriedDeps();

198 void findValueToReuse();

199 void findDepChainFromPHI(Instruction *I, DepChain &D);

200 void reuseValue();

205 bool isCallInstCommutative(CallInst *C);

206 };

207

208}

209

210char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;

211

213 "Hexagon-specific predictive commoning for HVX vectors",

214 false, false)

218 "Hexagon-specific predictive commoning for HVX vectors",

220

225 HexagonVectorLoopCarriedReuse Vlcr(&L);

226 if (!Vlcr.run())

230 return PA;

231}

232

233bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,

235 if (skipLoop(L))

236 return false;

237 HexagonVectorLoopCarriedReuse Vlcr(L);

238 return Vlcr.run();

239}

240

241bool HexagonVectorLoopCarriedReuse::run() {

242 if (!CurLoop->getLoopPreheader())

243 return false;

244

245

246 if (!CurLoop->getSubLoops().empty())

247 return false;

248

249

250 if (CurLoop->getNumBlocks() != 1)

251 return false;

252

253 return doVLCR();

254}

255

256bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {

257 switch (C->getCalledFunction()->getIntrinsicID()) {

258 case Intrinsic::hexagon_V6_vaddb:

259 case Intrinsic::hexagon_V6_vaddb_128B:

260 case Intrinsic::hexagon_V6_vaddh:

261 case Intrinsic::hexagon_V6_vaddh_128B:

262 case Intrinsic::hexagon_V6_vaddw:

263 case Intrinsic::hexagon_V6_vaddw_128B:

264 case Intrinsic::hexagon_V6_vaddubh:

265 case Intrinsic::hexagon_V6_vaddubh_128B:

266 case Intrinsic::hexagon_V6_vadduhw:

267 case Intrinsic::hexagon_V6_vadduhw_128B:

268 case Intrinsic::hexagon_V6_vaddhw:

269 case Intrinsic::hexagon_V6_vaddhw_128B:

270 case Intrinsic::hexagon_V6_vmaxb:

271 case Intrinsic::hexagon_V6_vmaxb_128B:

272 case Intrinsic::hexagon_V6_vmaxh:

273 case Intrinsic::hexagon_V6_vmaxh_128B:

274 case Intrinsic::hexagon_V6_vmaxw:

275 case Intrinsic::hexagon_V6_vmaxw_128B:

276 case Intrinsic::hexagon_V6_vmaxub:

277 case Intrinsic::hexagon_V6_vmaxub_128B:

278 case Intrinsic::hexagon_V6_vmaxuh:

279 case Intrinsic::hexagon_V6_vmaxuh_128B:

280 case Intrinsic::hexagon_V6_vminub:

281 case Intrinsic::hexagon_V6_vminub_128B:

282 case Intrinsic::hexagon_V6_vminuh:

283 case Intrinsic::hexagon_V6_vminuh_128B:

284 case Intrinsic::hexagon_V6_vminb:

285 case Intrinsic::hexagon_V6_vminb_128B:

286 case Intrinsic::hexagon_V6_vminh:

287 case Intrinsic::hexagon_V6_vminh_128B:

288 case Intrinsic::hexagon_V6_vminw:

289 case Intrinsic::hexagon_V6_vminw_128B:

290 case Intrinsic::hexagon_V6_vmpyub:

291 case Intrinsic::hexagon_V6_vmpyub_128B:

292 case Intrinsic::hexagon_V6_vmpyuh:

293 case Intrinsic::hexagon_V6_vmpyuh_128B:

294 case Intrinsic::hexagon_V6_vavgub:

295 case Intrinsic::hexagon_V6_vavgub_128B:

296 case Intrinsic::hexagon_V6_vavgh:

297 case Intrinsic::hexagon_V6_vavgh_128B:

298 case Intrinsic::hexagon_V6_vavguh:

299 case Intrinsic::hexagon_V6_vavguh_128B:

300 case Intrinsic::hexagon_V6_vavgw:

301 case Intrinsic::hexagon_V6_vavgw_128B:

302 case Intrinsic::hexagon_V6_vavgb:

303 case Intrinsic::hexagon_V6_vavgb_128B:

304 case Intrinsic::hexagon_V6_vavguw:

305 case Intrinsic::hexagon_V6_vavguw_128B:

306 case Intrinsic::hexagon_V6_vabsdiffh:

307 case Intrinsic::hexagon_V6_vabsdiffh_128B:

308 case Intrinsic::hexagon_V6_vabsdiffub:

309 case Intrinsic::hexagon_V6_vabsdiffub_128B:

310 case Intrinsic::hexagon_V6_vabsdiffuh:

311 case Intrinsic::hexagon_V6_vabsdiffuh_128B:

312 case Intrinsic::hexagon_V6_vabsdiffw:

313 case Intrinsic::hexagon_V6_vabsdiffw_128B:

314 return true;

315 default:

316 return false;

317 }

318}

319

320bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,

322 if (I1->isSameOperationAs(I2))

323 return false;

324

325

326

327

328 if (CallInst *C1 = dyn_cast(I1)) {

329 if (CallInst *C2 = dyn_cast(I2)) {

330 if (C1->getCalledFunction() != C2->getCalledFunction())

331 return false;

332 }

333 }

334

335

336

338 unsigned NumOperands = I1->getNumOperands();

339 for (unsigned i = 0; i < NumOperands; ++i) {

340 ConstantInt *C1 = dyn_cast(I1->getOperand(i));

342 if(!C1) continue;

345 return false;

346 }

347 }

348

349 return true;

350}

351

352bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {

354 if (II)

355 return true;

356

357 switch (II->getIntrinsicID()) {

358 case Intrinsic::hexagon_V6_hi:

359 case Intrinsic::hexagon_V6_lo:

360 case Intrinsic::hexagon_V6_hi_128B:

361 case Intrinsic::hexagon_V6_lo_128B:

362 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");

363 return false;

364 default:

365 return true;

366 }

367}

368void HexagonVectorLoopCarriedReuse::findValueToReuse() {

369 for (auto *D : Dependences) {

370 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");

374 << ".. Skipping because number of iterations > than the limit\n");

375 continue;

376 }

377

378 PHINode *PN = cast(D->front());

380 int Iters = D->iterations();

382 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN

383 << " can be reused\n");

384

386 for (Use &U : PN->uses()) {

388

389 if (User->getParent() != BB)

390 continue;

391 if (ReplacedInsts.count(User)) {

393 << " has already been replaced. Skipping...\n");

394 continue;

395 }

396 if (isa(User))

397 continue;

398 if (User->mayHaveSideEffects())

399 continue;

400 if (!canReplace(User))

401 continue;

402

404 }

405 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");

406

407

408

409

410

411

412

414 for (Use &U : BEInst->uses()) {

415 Instruction *BEUser = cast(U.getUser());

416

418 continue;

419 if (!isEquivalentOperation(I, BEUser))

420 continue;

421

422 int NumOperands = I->getNumOperands();

423

424

425

426

427

428

429

430

431

432

433 std::map<Instruction *, DepChain *> DepChains;

434 CallInst *C1 = dyn_cast(I);

435 if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {

436 bool Found = false;

437 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {

438 Value *Op = I->getOperand(OpNo);

440 Found = false;

441 for (int T = 0; T < NumOperands; ++T) {

443 Instruction *BEOpInst = dyn_cast(BEOp);

444 if (!OpInst && !BEOpInst) {

445 if (Op == BEOp) {

446 Found = true;

447 break;

448 }

449 }

450

451 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))

452 continue;

453

454 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);

455

456 if (D) {

457 Found = true;

458 DepChains[OpInst] = D;

459 break;

460 }

461 }

462 if (!Found) {

463 BEUser = nullptr;

464 break;

465 }

466 }

467 } else {

468

469 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {

470 Value *Op = I->getOperand(OpNo);

472

474 if (!OpInst) {

475 if (Op == BEOp)

476 continue;

477

478

479 BEUser = nullptr;

480 break;

481 }

482

483 Instruction *BEOpInst = dyn_cast(BEOp);

484 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);

485

486 if (D) {

487 DepChains[OpInst] = D;

488 } else {

489 BEUser = nullptr;

490 break;

491 }

492 }

493 }

494 if (BEUser) {

496 ReuseCandidate.Inst2Replace = I;

497 ReuseCandidate.BackedgeInst = BEUser;

498 ReuseCandidate.DepChains = DepChains;

499 ReuseCandidate.Iterations = Iters;

500 return;

501 }

502 ReuseCandidate.reset();

503 }

504 }

505 }

506 ReuseCandidate.reset();

507}

508

509Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,

511 PHINode *PN = dyn_cast(Op);

514 return ValueInBlock;

515}

516

517void HexagonVectorLoopCarriedReuse::reuseValue() {

519 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;

520 Instruction *BEInst = ReuseCandidate.BackedgeInst;

522 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;

523 int Iterations = ReuseCandidate.Iterations;

524 BasicBlock *LoopPH = CurLoop->getLoopPreheader();

525 assert(!DepChains.empty() && "No DepChains");

526 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");

527

529 for (int i = 0; i < Iterations; ++i) {

532 for (int j = 0; j < NumOperands; ++j) {

534 if (I)

535 continue;

536

537 DepChain &D = *DepChains[I];

538

539

540

541 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);

542 InstInPreheader->setOperand(j, ValInPreheader);

543 }

544 InstsInPreheader.push_back(InstInPreheader);

545 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");

547 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "

548 << LoopPH->getName() << "\n");

549 }

553 Value *BEVal = BEInst;

555 for (int i = Iterations-1; i >=0 ; --i) {

556 Instruction *InstInPreheader = InstsInPreheader[i];

557 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);

558 NewPhi->addIncoming(InstInPreheader, LoopPH);

561 << "\n");

562 BEVal = NewPhi;

563 }

564

565

567 ReplacedInsts.insert(Inst2Replace);

568 ++HexagonNumVectorLoopCarriedReuse;

569}

570

571bool HexagonVectorLoopCarriedReuse::doVLCR() {

572 assert(CurLoop->getSubLoops().empty() &&

573 "Can do VLCR on the innermost loop only");

574 assert((CurLoop->getNumBlocks() == 1) &&

575 "Can do VLCR only on single block loops");

576

577 bool Changed = false;

579

580 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");

581 do {

582

583 Dependences.clear();

585

586 findLoopCarriedDeps();

587 findValueToReuse();

588 if (ReuseCandidate.isDefined()) {

589 reuseValue();

590 Changed = true;

592 }

593 llvm::for_each(Dependences, std::default_delete());

595 return Changed;

596}

597

598void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,

599 DepChain &D) {

600 PHINode *PN = dyn_cast(I);

601 if (!PN) {

602 D.push_back(I);

603 return;

604 } else {

606 if (NumIncomingValues != 2) {

607 D.clear();

608 return;

609 }

610

612 if (BB != CurLoop->getHeader()) {

613 D.clear();

614 return;

615 }

616

618 Instruction *BEInst = dyn_cast(BEVal);

619

620

621 assert(BEInst && "There should be a value over the backedge");

622

623 Value *PreHdrVal =

625 if(!PreHdrVal || !isa(PreHdrVal)) {

626 D.clear();

627 return;

628 }

629 D.push_back(PN);

630 findDepChainFromPHI(BEInst, D);

631 }

632}

633

634DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,

636 int Iters) {

637 for (auto *D : Dependences) {

638 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)

639 return D;

640 }

641 return nullptr;

642}

643

644void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {

645 BasicBlock *BB = CurLoop->getHeader();

646 for (auto I = BB->begin(), E = BB->end(); I != E && isa(I); ++I) {

647 auto *PN = cast(I);

648 if (!isa(PN->getType()))

649 continue;

650

651 DepChain *D = new DepChain();

652 findDepChainFromPHI(PN, *D);

653 if (D->size() != 0)

654 Dependences.insert(D);

655 else

656 delete D;

657 }

658 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");

659 LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";);

660}

661

663 return new HexagonVectorLoopCarriedReuseLegacyPass();

664}

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

#define LLVM_ATTRIBUTE_UNUSED

std::optional< std::vector< StOtherPiece > > Other

static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2))

hexagon Hexagon specific predictive commoning for HVX vectors

This defines the Use class.

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

A container for analyses that lazily runs them and caches their results.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequiredID(const void *ID)

AnalysisUsage & addPreservedID(const void *ID)

void setPreservesCFG()

This function should be called by the pass, iff they do not:

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

InstListType::const_iterator getFirstNonPHIIt() const

Iterator returning form of getFirstNonPHI.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Represents analyses that only rely on functions' control flow.

This class represents a function call, abstracting a target machine's calling convention.

This is the shared class of boolean and integer constants.

int64_t getSExtValue() const

Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...

This class represents an Operation in the Expression.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Instruction * clone() const

Create a copy of 'this' instruction that is identical in all ways except the following:

void insertBefore(Instruction *InsertPos)

Insert an unlinked instruction into a basic block immediately before the specified instruction.

A wrapper class for inspecting calls to intrinsic functions.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0

Represents a single loop in the control flow graph.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

Value * getIncomingValueForBlock(const BasicBlock *BB) const

unsigned getNumIncomingValues() const

Return the number of incoming edges.

PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

void preserveSet()

Mark an analysis set as preserved.

A vector that has set insertion semantics.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

bool isVectorTy() const

True if this is an instance of VectorType.

A Use represents the edge between a Value definition and its users.

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

unsigned getNumOperands() const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

void setName(const Twine &Name)

Change the name of the value.

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< use_iterator > uses()

StringRef getName() const

Return a constant reference to the value's name.

const ParentTy * getParent() const

This class implements an extremely fast bulk output stream that can only output to a stream.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

This is an optimization pass for GlobalISel generic memory operations.

UnaryFunction for_each(R &&Range, UnaryFunction F)

Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

Pass * createHexagonVectorLoopCarriedReuseLegacyPass()

Hexagon Vector Loop Carried Reuse Pass.

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...