LLVM: lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

30#include "llvm/IR/IntrinsicsHexagon.h"

42#include

43#include

44#include

45#include

46

47using namespace llvm;

48

49#define DEBUG_TYPE "hexagon-vlcr"

50

51STATISTIC(HexagonNumVectorLoopCarriedReuse,

52 "Number of values that were reused from a previous iteration.");

53

55 "hexagon-vlcr-iteration-lim", cl::Hidden,

56 cl::desc("Maximum distance of loop carried dependences that are handled"),

58

59namespace {

60

61

63

64 class DepChain {

65 ChainOfDependences Chain;

66

67 public:

68 bool isIdentical(DepChain &Other) const {

70 return false;

71 ChainOfDependences &OtherChain = Other.getChain();

72 for (int i = 0; i < size(); ++i) {

73 if (Chain[i] != OtherChain[i])

74 return false;

75 }

76 return true;

77 }

78

79 ChainOfDependences &getChain() {

80 return Chain;

81 }

82

83 int size() const {

84 return Chain.size();

85 }

86

87 void clear() {

88 Chain.clear();

89 }

90

91 void push_back(Instruction *I) {

92 Chain.push_back(I);

93 }

94

95 int iterations() const {

96 return size() - 1;

97 }

98

100 return Chain.front();

101 }

102

104 return Chain.back();

105 }

106

107 Instruction *&operator[](const int index) {

108 return Chain[index];

109 }

110

111 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);

112 };

113

114 [[maybe_unused]]

116 const ChainOfDependences &CD = D.Chain;

117 int ChainSize = CD.size();

118 OS << "**DepChain Start::**\n";

119 for (int i = 0; i < ChainSize -1; ++i) {

120 OS << *(CD[i]) << " -->\n";

121 }

122 OS << *CD[ChainSize-1] << "\n";

123 return OS;

124 }

125

126 struct ReuseValue {

128

129

130

131

133 std::map<Instruction *, DepChain *> DepChains;

134 int Iterations = -1;

135

136 ReuseValue() = default;

137

138 void reset() {

139 Inst2Replace = nullptr;

140 BackedgeInst = nullptr;

141 DepChains.clear();

142 Iterations = -1;

143 }

144 bool isDefined() { return Inst2Replace != nullptr; }

145 };

146

147 [[maybe_unused]]

149 OS << "** ReuseValue ***\n";

150 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";

151 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";

152 return OS;

153 }

154

155 class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {

156 public:

157 static char ID;

158

159 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {}

160

161 StringRef getPassName() const override {

162 return "Hexagon-specific loop carried reuse for HVX vectors";

163 }

164

165 void getAnalysisUsage(AnalysisUsage &AU) const override {

170 }

171

172 bool runOnLoop(Loop *L, LPPassManager &LPM) override;

173 };

174

175 class HexagonVectorLoopCarriedReuse {

176 public:

177 HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};

178

179 bool run();

180

181 private:

182 SetVector<DepChain *> Dependences;

183 std::set<Instruction *> ReplacedInsts;

184 Loop *CurLoop;

185 ReuseValue ReuseCandidate;

186

187 bool doVLCR();

188 void findLoopCarriedDeps();

189 void findValueToReuse();

190 void findDepChainFromPHI(Instruction *I, DepChain &D);

191 void reuseValue();

192 Value *findValueInBlock(Value *Op, BasicBlock *BB);

193 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);

194 bool isEquivalentOperation(Instruction *I1, Instruction *I2);

195 bool canReplace(Instruction *I);

196 bool isCallInstCommutative(CallInst *C);

197 };

198

199}

200

201char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;

202

204 "Hexagon-specific predictive commoning for HVX vectors",

205 false, false)

209 "Hexagon-specific predictive commoning for HVX vectors",

211

216 HexagonVectorLoopCarriedReuse Vlcr(&L);

217 if (!Vlcr.run())

221 return PA;

222}

223

224bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,

226 if (skipLoop(L))

227 return false;

228 HexagonVectorLoopCarriedReuse Vlcr(L);

229 return Vlcr.run();

230}

231

232bool HexagonVectorLoopCarriedReuse::run() {

234 return false;

235

236

238 return false;

239

240

242 return false;

243

244 return doVLCR();

245}

246

247bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {

248 switch (C->getCalledFunction()->getIntrinsicID()) {

249 case Intrinsic::hexagon_V6_vaddb:

250 case Intrinsic::hexagon_V6_vaddb_128B:

251 case Intrinsic::hexagon_V6_vaddh:

252 case Intrinsic::hexagon_V6_vaddh_128B:

253 case Intrinsic::hexagon_V6_vaddw:

254 case Intrinsic::hexagon_V6_vaddw_128B:

255 case Intrinsic::hexagon_V6_vaddubh:

256 case Intrinsic::hexagon_V6_vaddubh_128B:

257 case Intrinsic::hexagon_V6_vadduhw:

258 case Intrinsic::hexagon_V6_vadduhw_128B:

259 case Intrinsic::hexagon_V6_vaddhw:

260 case Intrinsic::hexagon_V6_vaddhw_128B:

261 case Intrinsic::hexagon_V6_vmaxb:

262 case Intrinsic::hexagon_V6_vmaxb_128B:

263 case Intrinsic::hexagon_V6_vmaxh:

264 case Intrinsic::hexagon_V6_vmaxh_128B:

265 case Intrinsic::hexagon_V6_vmaxw:

266 case Intrinsic::hexagon_V6_vmaxw_128B:

267 case Intrinsic::hexagon_V6_vmaxub:

268 case Intrinsic::hexagon_V6_vmaxub_128B:

269 case Intrinsic::hexagon_V6_vmaxuh:

270 case Intrinsic::hexagon_V6_vmaxuh_128B:

271 case Intrinsic::hexagon_V6_vminub:

272 case Intrinsic::hexagon_V6_vminub_128B:

273 case Intrinsic::hexagon_V6_vminuh:

274 case Intrinsic::hexagon_V6_vminuh_128B:

275 case Intrinsic::hexagon_V6_vminb:

276 case Intrinsic::hexagon_V6_vminb_128B:

277 case Intrinsic::hexagon_V6_vminh:

278 case Intrinsic::hexagon_V6_vminh_128B:

279 case Intrinsic::hexagon_V6_vminw:

280 case Intrinsic::hexagon_V6_vminw_128B:

281 case Intrinsic::hexagon_V6_vmpyub:

282 case Intrinsic::hexagon_V6_vmpyub_128B:

283 case Intrinsic::hexagon_V6_vmpyuh:

284 case Intrinsic::hexagon_V6_vmpyuh_128B:

285 case Intrinsic::hexagon_V6_vavgub:

286 case Intrinsic::hexagon_V6_vavgub_128B:

287 case Intrinsic::hexagon_V6_vavgh:

288 case Intrinsic::hexagon_V6_vavgh_128B:

289 case Intrinsic::hexagon_V6_vavguh:

290 case Intrinsic::hexagon_V6_vavguh_128B:

291 case Intrinsic::hexagon_V6_vavgw:

292 case Intrinsic::hexagon_V6_vavgw_128B:

293 case Intrinsic::hexagon_V6_vavgb:

294 case Intrinsic::hexagon_V6_vavgb_128B:

295 case Intrinsic::hexagon_V6_vavguw:

296 case Intrinsic::hexagon_V6_vavguw_128B:

297 case Intrinsic::hexagon_V6_vabsdiffh:

298 case Intrinsic::hexagon_V6_vabsdiffh_128B:

299 case Intrinsic::hexagon_V6_vabsdiffub:

300 case Intrinsic::hexagon_V6_vabsdiffub_128B:

301 case Intrinsic::hexagon_V6_vabsdiffuh:

302 case Intrinsic::hexagon_V6_vabsdiffuh_128B:

303 case Intrinsic::hexagon_V6_vabsdiffw:

304 case Intrinsic::hexagon_V6_vabsdiffw_128B:

305 return true;

306 default:

307 return false;

308 }

309}

310

311bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,

312 Instruction *I2) {

313 if (I1->isSameOperationAs(I2))

314 return false;

315

316

317

318

321 if (C1->getCalledFunction() != C2->getCalledFunction())

322 return false;

323 }

324 }

325

326

327

329 unsigned NumOperands = I1->getNumOperands();

330 for (unsigned i = 0; i < NumOperands; ++i) {

333 if(!C1) continue;

336 return false;

337 }

338 }

339

340 return true;

341}

342

343bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {

345 if (II)

346 return true;

347

348 switch (II->getIntrinsicID()) {

349 case Intrinsic::hexagon_V6_hi:

350 case Intrinsic::hexagon_V6_lo:

351 case Intrinsic::hexagon_V6_hi_128B:

352 case Intrinsic::hexagon_V6_lo_128B:

353 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");

354 return false;

355 default:

356 return true;

357 }

358}

359void HexagonVectorLoopCarriedReuse::findValueToReuse() {

360 for (auto *D : Dependences) {

361 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");

365 << ".. Skipping because number of iterations > than the limit\n");

366 continue;

367 }

368

371 int Iters = D->iterations();

373 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN

374 << " can be reused\n");

375

376 SmallVector<Instruction *, 4> PNUsers;

377 for (Use &U : PN->uses()) {

379

380 if (User->getParent() != BB)

381 continue;

382 if (ReplacedInsts.count(User)) {

384 << " has already been replaced. Skipping...\n");

385 continue;

386 }

388 continue;

389 if (User->mayHaveSideEffects())

390 continue;

391 if (!canReplace(User))

392 continue;

393

395 }

396 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");

397

398

399

400

401

402

403

404 for (Instruction *I : PNUsers) {

405 for (Use &U : BEInst->uses()) {

407

409 continue;

410 if (!isEquivalentOperation(I, BEUser))

411 continue;

412

413 int NumOperands = I->getNumOperands();

414

415

416

417

418

419

420

421

422

423

424 std::map<Instruction *, DepChain *> DepChains;

426 if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {

427 bool Found = false;

428 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {

429 Value *Op = I->getOperand(OpNo);

431 Found = false;

432 for (int T = 0; T < NumOperands; ++T) {

435 if (!OpInst && !BEOpInst) {

436 if (Op == BEOp) {

437 Found = true;

438 break;

439 }

440 }

441

442 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))

443 continue;

444

445 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);

446

447 if (D) {

448 Found = true;

449 DepChains[OpInst] = D;

450 break;

451 }

452 }

453 if (!Found) {

454 BEUser = nullptr;

455 break;

456 }

457 }

458 } else {

459

460 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {

461 Value *Op = I->getOperand(OpNo);

463

465 if (!OpInst) {

466 if (Op == BEOp)

467 continue;

468

469

470 BEUser = nullptr;

471 break;

472 }

473

475 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);

476

477 if (D) {

478 DepChains[OpInst] = D;

479 } else {

480 BEUser = nullptr;

481 break;

482 }

483 }

484 }

485 if (BEUser) {

487 ReuseCandidate.Inst2Replace = I;

488 ReuseCandidate.BackedgeInst = BEUser;

489 ReuseCandidate.DepChains = DepChains;

490 ReuseCandidate.Iterations = Iters;

491 return;

492 }

493 ReuseCandidate.reset();

494 }

495 }

496 }

497 ReuseCandidate.reset();

498}

499

500Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,

501 BasicBlock *BB) {

505 return ValueInBlock;

506}

507

508void HexagonVectorLoopCarriedReuse::reuseValue() {

510 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;

511 Instruction *BEInst = ReuseCandidate.BackedgeInst;

513 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;

514 int Iterations = ReuseCandidate.Iterations;

516 assert(!DepChains.empty() && "No DepChains");

517 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");

518

519 SmallVector<Instruction *, 4> InstsInPreheader;

520 for (int i = 0; i < Iterations; ++i) {

522 for (int j = 0; j < NumOperands; ++j) {

524 if (I)

525 continue;

526

527 DepChain &D = *DepChains[I];

528

529

530

531 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);

532 InstInPreheader->setOperand(j, ValInPreheader);

533 }

534 InstsInPreheader.push_back(InstInPreheader);

535 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");

537 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "

538 << LoopPH->getName() << "\n");

539 }

543 Value *BEVal = BEInst;

544 PHINode *NewPhi;

545 for (int i = Iterations-1; i >=0 ; --i) {

546 Instruction *InstInPreheader = InstsInPreheader[i];

547 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);

548 NewPhi->addIncoming(InstInPreheader, LoopPH);

551 << "\n");

552 BEVal = NewPhi;

553 }

554

555

557 ReplacedInsts.insert(Inst2Replace);

558 ++HexagonNumVectorLoopCarriedReuse;

559}

560

561bool HexagonVectorLoopCarriedReuse::doVLCR() {

563 "Can do VLCR on the innermost loop only");

565 "Can do VLCR only on single block loops");

566

569

571 do {

572

573 Dependences.clear();

575

576 findLoopCarriedDeps();

577 findValueToReuse();

578 if (ReuseCandidate.isDefined()) {

579 reuseValue();

582 }

583 llvm::for_each(Dependences, std::default_delete());

586}

587

588void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,

589 DepChain &D) {

591 if (!PN) {

592 D.push_back(I);

593 return;

594 } else {

596 if (NumIncomingValues != 2) {

597 D.clear();

598 return;

599 }

600

602 if (BB != CurLoop->getHeader()) {

603 D.clear();

604 return;

605 }

606

609

610

611 assert(BEInst && "There should be a value over the backedge");

612

613 Value *PreHdrVal =

616 D.clear();

617 return;

618 }

619 D.push_back(PN);

620 findDepChainFromPHI(BEInst, D);

621 }

622}

623

624DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,

625 Instruction *I2,

626 int Iters) {

627 for (auto *D : Dependences) {

628 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)

629 return D;

630 }

631 return nullptr;

632}

633

634void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {

639 continue;

640

641 DepChain *D = new DepChain();

642 findDepChainFromPHI(PN, *D);

643 if (D->size() != 0)

644 Dependences.insert(D);

645 else

646 delete D;

647 }

648 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");

649 LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";);

650}

651

653 return new HexagonVectorLoopCarriedReuseLegacyPass();

654}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2))

This defines the Use class.

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)

AnalysisUsage & addPreservedID(const void *ID)

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

iterator begin()

Instruction iterator methods.

LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const

Returns an iterator to the first instruction in this block that is not a PHINode instruction.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Represents analyses that only rely on functions' control flow.

int64_t getSExtValue() const

Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...

LLVM_ABI Instruction * clone() const

Create a copy of 'this' instruction that is identical in all ways except the following:

LLVM_ABI void insertBefore(InstListType::iterator InsertPos)

Insert an unlinked instruction into a basic block immediately before the specified position.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

unsigned getNumBlocks() const

Get the number of blocks in this loop in constant time.

const std::vector< LoopT * > & getSubLoops() const

Return the loops contained entirely within this loop.

BlockT * getHeader() const

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

Represents a single loop in the control flow graph.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

Value * getIncomingValueForBlock(const BasicBlock *BB) const

unsigned getNumIncomingValues() const

Return the number of incoming edges.

Pass interface - Implemented by all 'passes'.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

bool isVectorTy() const

True if this is an instance of VectorType.

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

unsigned getNumOperands() const

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void setName(const Twine &Name)

Change the name of the value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< use_iterator > uses()

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

const ParentTy * getParent() const

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

@ User

could "use" a pointer

friend class Instruction

Iterator for Instructions in a `BasicBlock.

LLVM_ABI Instruction & back() const

LLVM_ABI Instruction & front() const

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

UnaryFunction for_each(R &&Range, UnaryFunction F)

Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI char & LoopSimplifyID

AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager

The loop analysis manager.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

DWARFExpression::Operation Op

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

Pass * createHexagonVectorLoopCarriedReuseLegacyPass()

Definition HexagonVectorLoopCarriedReuse.cpp:652

PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

Run pass over the Loop.

Definition HexagonVectorLoopCarriedReuse.cpp:213

HexagonVectorLoopCarriedReusePass()=default

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...