LLVM: lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
30#include "llvm/IR/IntrinsicsHexagon.h"
42#include
43#include
44#include
45#include
46
47using namespace llvm;
48
49#define DEBUG_TYPE "hexagon-vlcr"
50
51STATISTIC(HexagonNumVectorLoopCarriedReuse,
52 "Number of values that were reused from a previous iteration.");
53
55 "hexagon-vlcr-iteration-lim", cl::Hidden,
56 cl::desc("Maximum distance of loop carried dependences that are handled"),
58
59namespace {
60
61
63
64 class DepChain {
65 ChainOfDependences Chain;
66
67 public:
68 bool isIdentical(DepChain &Other) const {
70 return false;
71 ChainOfDependences &OtherChain = Other.getChain();
72 for (int i = 0; i < size(); ++i) {
73 if (Chain[i] != OtherChain[i])
74 return false;
75 }
76 return true;
77 }
78
79 ChainOfDependences &getChain() {
80 return Chain;
81 }
82
83 int size() const {
84 return Chain.size();
85 }
86
87 void clear() {
88 Chain.clear();
89 }
90
91 void push_back(Instruction *I) {
92 Chain.push_back(I);
93 }
94
95 int iterations() const {
96 return size() - 1;
97 }
98
100 return Chain.front();
101 }
102
104 return Chain.back();
105 }
106
107 Instruction *&operator[](const int index) {
108 return Chain[index];
109 }
110
111 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
112 };
113
114 [[maybe_unused]]
116 const ChainOfDependences &CD = D.Chain;
117 int ChainSize = CD.size();
118 OS << "**DepChain Start::**\n";
119 for (int i = 0; i < ChainSize -1; ++i) {
120 OS << *(CD[i]) << " -->\n";
121 }
122 OS << *CD[ChainSize-1] << "\n";
123 return OS;
124 }
125
126 struct ReuseValue {
128
129
130
131
133 std::map<Instruction *, DepChain *> DepChains;
134 int Iterations = -1;
135
136 ReuseValue() = default;
137
138 void reset() {
139 Inst2Replace = nullptr;
140 BackedgeInst = nullptr;
141 DepChains.clear();
142 Iterations = -1;
143 }
144 bool isDefined() { return Inst2Replace != nullptr; }
145 };
146
147 [[maybe_unused]]
149 OS << "** ReuseValue ***\n";
150 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
151 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
152 return OS;
153 }
154
155 class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {
156 public:
157 static char ID;
158
159 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {}
160
161 StringRef getPassName() const override {
162 return "Hexagon-specific loop carried reuse for HVX vectors";
163 }
164
165 void getAnalysisUsage(AnalysisUsage &AU) const override {
170 }
171
172 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
173 };
174
175 class HexagonVectorLoopCarriedReuse {
176 public:
177 HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};
178
179 bool run();
180
181 private:
182 SetVector<DepChain *> Dependences;
183 std::set<Instruction *> ReplacedInsts;
184 Loop *CurLoop;
185 ReuseValue ReuseCandidate;
186
187 bool doVLCR();
188 void findLoopCarriedDeps();
189 void findValueToReuse();
190 void findDepChainFromPHI(Instruction *I, DepChain &D);
191 void reuseValue();
192 Value *findValueInBlock(Value *Op, BasicBlock *BB);
193 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
194 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
195 bool canReplace(Instruction *I);
196 bool isCallInstCommutative(CallInst *C);
197 };
198
199}
200
201char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
202
204 "Hexagon-specific predictive commoning for HVX vectors",
205 false, false)
209 "Hexagon-specific predictive commoning for HVX vectors",
211
216 HexagonVectorLoopCarriedReuse Vlcr(&L);
217 if (!Vlcr.run())
221 return PA;
222}
223
224bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,
226 if (skipLoop(L))
227 return false;
228 HexagonVectorLoopCarriedReuse Vlcr(L);
229 return Vlcr.run();
230}
231
232bool HexagonVectorLoopCarriedReuse::run() {
234 return false;
235
236
238 return false;
239
240
242 return false;
243
244 return doVLCR();
245}
246
247bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
248 switch (C->getCalledFunction()->getIntrinsicID()) {
249 case Intrinsic::hexagon_V6_vaddb:
250 case Intrinsic::hexagon_V6_vaddb_128B:
251 case Intrinsic::hexagon_V6_vaddh:
252 case Intrinsic::hexagon_V6_vaddh_128B:
253 case Intrinsic::hexagon_V6_vaddw:
254 case Intrinsic::hexagon_V6_vaddw_128B:
255 case Intrinsic::hexagon_V6_vaddubh:
256 case Intrinsic::hexagon_V6_vaddubh_128B:
257 case Intrinsic::hexagon_V6_vadduhw:
258 case Intrinsic::hexagon_V6_vadduhw_128B:
259 case Intrinsic::hexagon_V6_vaddhw:
260 case Intrinsic::hexagon_V6_vaddhw_128B:
261 case Intrinsic::hexagon_V6_vmaxb:
262 case Intrinsic::hexagon_V6_vmaxb_128B:
263 case Intrinsic::hexagon_V6_vmaxh:
264 case Intrinsic::hexagon_V6_vmaxh_128B:
265 case Intrinsic::hexagon_V6_vmaxw:
266 case Intrinsic::hexagon_V6_vmaxw_128B:
267 case Intrinsic::hexagon_V6_vmaxub:
268 case Intrinsic::hexagon_V6_vmaxub_128B:
269 case Intrinsic::hexagon_V6_vmaxuh:
270 case Intrinsic::hexagon_V6_vmaxuh_128B:
271 case Intrinsic::hexagon_V6_vminub:
272 case Intrinsic::hexagon_V6_vminub_128B:
273 case Intrinsic::hexagon_V6_vminuh:
274 case Intrinsic::hexagon_V6_vminuh_128B:
275 case Intrinsic::hexagon_V6_vminb:
276 case Intrinsic::hexagon_V6_vminb_128B:
277 case Intrinsic::hexagon_V6_vminh:
278 case Intrinsic::hexagon_V6_vminh_128B:
279 case Intrinsic::hexagon_V6_vminw:
280 case Intrinsic::hexagon_V6_vminw_128B:
281 case Intrinsic::hexagon_V6_vmpyub:
282 case Intrinsic::hexagon_V6_vmpyub_128B:
283 case Intrinsic::hexagon_V6_vmpyuh:
284 case Intrinsic::hexagon_V6_vmpyuh_128B:
285 case Intrinsic::hexagon_V6_vavgub:
286 case Intrinsic::hexagon_V6_vavgub_128B:
287 case Intrinsic::hexagon_V6_vavgh:
288 case Intrinsic::hexagon_V6_vavgh_128B:
289 case Intrinsic::hexagon_V6_vavguh:
290 case Intrinsic::hexagon_V6_vavguh_128B:
291 case Intrinsic::hexagon_V6_vavgw:
292 case Intrinsic::hexagon_V6_vavgw_128B:
293 case Intrinsic::hexagon_V6_vavgb:
294 case Intrinsic::hexagon_V6_vavgb_128B:
295 case Intrinsic::hexagon_V6_vavguw:
296 case Intrinsic::hexagon_V6_vavguw_128B:
297 case Intrinsic::hexagon_V6_vabsdiffh:
298 case Intrinsic::hexagon_V6_vabsdiffh_128B:
299 case Intrinsic::hexagon_V6_vabsdiffub:
300 case Intrinsic::hexagon_V6_vabsdiffub_128B:
301 case Intrinsic::hexagon_V6_vabsdiffuh:
302 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
303 case Intrinsic::hexagon_V6_vabsdiffw:
304 case Intrinsic::hexagon_V6_vabsdiffw_128B:
305 return true;
306 default:
307 return false;
308 }
309}
310
311bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
312 Instruction *I2) {
313 if (->isSameOperationAs(I2))
314 return false;
315
316
317
318
321 if (C1->getCalledFunction() != C2->getCalledFunction())
322 return false;
323 }
324 }
325
326
327
329 unsigned NumOperands = I1->getNumOperands();
330 for (unsigned i = 0; i < NumOperands; ++i) {
333 if(!C1) continue;
336 return false;
337 }
338 }
339
340 return true;
341}
342
343bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
345 if ()
346 return true;
347
348 switch (II->getIntrinsicID()) {
349 case Intrinsic::hexagon_V6_hi:
350 case Intrinsic::hexagon_V6_lo:
351 case Intrinsic::hexagon_V6_hi_128B:
352 case Intrinsic::hexagon_V6_lo_128B:
353 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
354 return false;
355 default:
356 return true;
357 }
358}
359void HexagonVectorLoopCarriedReuse::findValueToReuse() {
360 for (auto *D : Dependences) {
361 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
365 << ".. Skipping because number of iterations > than the limit\n");
366 continue;
367 }
368
371 int Iters = D->iterations();
373 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
374 << " can be reused\n");
375
376 SmallVector<Instruction *, 4> PNUsers;
377 for (Use &U : PN->uses()) {
379
380 if (User->getParent() != BB)
381 continue;
382 if (ReplacedInsts.count(User)) {
384 << " has already been replaced. Skipping...\n");
385 continue;
386 }
388 continue;
389 if (User->mayHaveSideEffects())
390 continue;
391 if (!canReplace(User))
392 continue;
393
395 }
396 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
397
398
399
400
401
402
403
404 for (Instruction *I : PNUsers) {
405 for (Use &U : BEInst->uses()) {
407
409 continue;
410 if (!isEquivalentOperation(I, BEUser))
411 continue;
412
413 int NumOperands = I->getNumOperands();
414
415
416
417
418
419
420
421
422
423
424 std::map<Instruction *, DepChain *> DepChains;
426 if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
427 bool Found = false;
428 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
429 Value *Op = I->getOperand(OpNo);
431 Found = false;
432 for (int T = 0; T < NumOperands; ++T) {
435 if (!OpInst && !BEOpInst) {
436 if (Op == BEOp) {
437 Found = true;
438 break;
439 }
440 }
441
442 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
443 continue;
444
445 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
446
447 if (D) {
448 Found = true;
449 DepChains[OpInst] = D;
450 break;
451 }
452 }
453 if (!Found) {
454 BEUser = nullptr;
455 break;
456 }
457 }
458 } else {
459
460 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
461 Value *Op = I->getOperand(OpNo);
463
465 if (!OpInst) {
466 if (Op == BEOp)
467 continue;
468
469
470 BEUser = nullptr;
471 break;
472 }
473
475 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
476
477 if (D) {
478 DepChains[OpInst] = D;
479 } else {
480 BEUser = nullptr;
481 break;
482 }
483 }
484 }
485 if (BEUser) {
487 ReuseCandidate.Inst2Replace = I;
488 ReuseCandidate.BackedgeInst = BEUser;
489 ReuseCandidate.DepChains = DepChains;
490 ReuseCandidate.Iterations = Iters;
491 return;
492 }
493 ReuseCandidate.reset();
494 }
495 }
496 }
497 ReuseCandidate.reset();
498}
499
500Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
501 BasicBlock *BB) {
505 return ValueInBlock;
506}
507
508void HexagonVectorLoopCarriedReuse::reuseValue() {
510 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
511 Instruction *BEInst = ReuseCandidate.BackedgeInst;
513 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
514 int Iterations = ReuseCandidate.Iterations;
516 assert(!DepChains.empty() && "No DepChains");
517 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
518
519 SmallVector<Instruction *, 4> InstsInPreheader;
520 for (int i = 0; i < Iterations; ++i) {
522 for (int j = 0; j < NumOperands; ++j) {
524 if ()
525 continue;
526
527 DepChain &D = *DepChains[I];
528
529
530
531 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
532 InstInPreheader->setOperand(j, ValInPreheader);
533 }
534 InstsInPreheader.push_back(InstInPreheader);
535 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
537 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
538 << LoopPH->getName() << "\n");
539 }
543 Value *BEVal = BEInst;
544 PHINode *NewPhi;
545 for (int i = Iterations-1; i >=0 ; --i) {
546 Instruction *InstInPreheader = InstsInPreheader[i];
547 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
548 NewPhi->addIncoming(InstInPreheader, LoopPH);
551 << "\n");
552 BEVal = NewPhi;
553 }
554
555
557 ReplacedInsts.insert(Inst2Replace);
558 ++HexagonNumVectorLoopCarriedReuse;
559}
560
561bool HexagonVectorLoopCarriedReuse::doVLCR() {
563 "Can do VLCR on the innermost loop only");
565 "Can do VLCR only on single block loops");
566
569
571 do {
572
573 Dependences.clear();
575
576 findLoopCarriedDeps();
577 findValueToReuse();
578 if (ReuseCandidate.isDefined()) {
579 reuseValue();
582 }
583 llvm::for_each(Dependences, std::default_delete());
586}
587
588void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
589 DepChain &D) {
591 if (!PN) {
593 return;
594 } else {
596 if (NumIncomingValues != 2) {
597 D.clear();
598 return;
599 }
600
602 if (BB != CurLoop->getHeader()) {
603 D.clear();
604 return;
605 }
606
609
610
611 assert(BEInst && "There should be a value over the backedge");
612
613 Value *PreHdrVal =
616 D.clear();
617 return;
618 }
619 D.push_back(PN);
620 findDepChainFromPHI(BEInst, D);
621 }
622}
623
624DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
625 Instruction *I2,
626 int Iters) {
627 for (auto *D : Dependences) {
628 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
629 return D;
630 }
631 return nullptr;
632}
633
634void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
639 continue;
640
641 DepChain *D = new DepChain();
642 findDepChainFromPHI(PN, *D);
643 if (D->size() != 0)
644 Dependences.insert(D);
645 else
646 delete D;
647 }
648 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
649 LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";);
650}
651
653 return new HexagonVectorLoopCarriedReuseLegacyPass();
654}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2))
This defines the Use class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
iterator begin()
Instruction iterator methods.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isVectorTy() const
True if this is an instance of VectorType.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction & back() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI char & LoopSimplifyID
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
Pass * createHexagonVectorLoopCarriedReuseLegacyPass()
Definition HexagonVectorLoopCarriedReuse.cpp:652
PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Run pass over the Loop.
Definition HexagonVectorLoopCarriedReuse.cpp:213
HexagonVectorLoopCarriedReusePass()=default
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...