LLVM: lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
29#include "llvm/IR/IntrinsicsHexagon.h"
41#include
42#include
43#include
44#include
45
46using namespace llvm;
47
48#define DEBUG_TYPE "hexagon-vlcr"
49
50STATISTIC(HexagonNumVectorLoopCarriedReuse,
51 "Number of values that were reused from a previous iteration.");
52
54 "hexagon-vlcr-iteration-lim", cl::Hidden,
55 cl::desc("Maximum distance of loop carried dependences that are handled"),
57
58namespace llvm {
59
62
63}
64
65namespace {
66
67
69
70 class DepChain {
71 ChainOfDependences Chain;
72
73 public:
74 bool isIdentical(DepChain &Other) const {
76 return false;
77 ChainOfDependences &OtherChain = Other.getChain();
78 for (int i = 0; i < size(); ++i) {
79 if (Chain[i] != OtherChain[i])
80 return false;
81 }
82 return true;
83 }
84
85 ChainOfDependences &getChain() {
86 return Chain;
87 }
88
89 int size() const {
90 return Chain.size();
91 }
92
93 void clear() {
94 Chain.clear();
95 }
96
98 Chain.push_back(I);
99 }
100
101 int iterations() const {
102 return size() - 1;
103 }
104
106 return Chain.front();
107 }
108
110 return Chain.back();
111 }
112
113 Instruction *&operator[](const int index) {
114 return Chain[index];
115 }
116
118 };
119
122 const ChainOfDependences &CD = D.Chain;
123 int ChainSize = CD.size();
124 OS << "**DepChain Start::**\n";
125 for (int i = 0; i < ChainSize -1; ++i) {
126 OS << *(CD[i]) << " -->\n";
127 }
128 OS << *CD[ChainSize-1] << "\n";
129 return OS;
130 }
131
132 struct ReuseValue {
134
135
136
137
139 std::map<Instruction *, DepChain *> DepChains;
140 int Iterations = -1;
141
142 ReuseValue() = default;
143
144 void reset() {
145 Inst2Replace = nullptr;
146 BackedgeInst = nullptr;
147 DepChains.clear();
148 Iterations = -1;
149 }
150 bool isDefined() { return Inst2Replace != nullptr; }
151 };
152
155 OS << "** ReuseValue ***\n";
156 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
157 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
158 return OS;
159 }
160
161 class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {
162 public:
163 static char ID;
164
165 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {
168 }
169
171 return "Hexagon-specific loop carried reuse for HVX vectors";
172 }
173
179 }
180
182 };
183
184 class HexagonVectorLoopCarriedReuse {
185 public:
186 HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};
187
188 bool run();
189
190 private:
192 std::set<Instruction *> ReplacedInsts;
193 Loop *CurLoop;
194 ReuseValue ReuseCandidate;
195
196 bool doVLCR();
197 void findLoopCarriedDeps();
198 void findValueToReuse();
199 void findDepChainFromPHI(Instruction *I, DepChain &D);
200 void reuseValue();
205 bool isCallInstCommutative(CallInst *C);
206 };
207
208}
209
210char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
211
213 "Hexagon-specific predictive commoning for HVX vectors",
214 false, false)
218 "Hexagon-specific predictive commoning for HVX vectors",
220
225 HexagonVectorLoopCarriedReuse Vlcr(&L);
226 if (!Vlcr.run())
230 return PA;
231}
232
233bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,
235 if (skipLoop(L))
236 return false;
237 HexagonVectorLoopCarriedReuse Vlcr(L);
238 return Vlcr.run();
239}
240
241bool HexagonVectorLoopCarriedReuse::run() {
242 if (!CurLoop->getLoopPreheader())
243 return false;
244
245
246 if (!CurLoop->getSubLoops().empty())
247 return false;
248
249
250 if (CurLoop->getNumBlocks() != 1)
251 return false;
252
253 return doVLCR();
254}
255
256bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
257 switch (C->getCalledFunction()->getIntrinsicID()) {
258 case Intrinsic::hexagon_V6_vaddb:
259 case Intrinsic::hexagon_V6_vaddb_128B:
260 case Intrinsic::hexagon_V6_vaddh:
261 case Intrinsic::hexagon_V6_vaddh_128B:
262 case Intrinsic::hexagon_V6_vaddw:
263 case Intrinsic::hexagon_V6_vaddw_128B:
264 case Intrinsic::hexagon_V6_vaddubh:
265 case Intrinsic::hexagon_V6_vaddubh_128B:
266 case Intrinsic::hexagon_V6_vadduhw:
267 case Intrinsic::hexagon_V6_vadduhw_128B:
268 case Intrinsic::hexagon_V6_vaddhw:
269 case Intrinsic::hexagon_V6_vaddhw_128B:
270 case Intrinsic::hexagon_V6_vmaxb:
271 case Intrinsic::hexagon_V6_vmaxb_128B:
272 case Intrinsic::hexagon_V6_vmaxh:
273 case Intrinsic::hexagon_V6_vmaxh_128B:
274 case Intrinsic::hexagon_V6_vmaxw:
275 case Intrinsic::hexagon_V6_vmaxw_128B:
276 case Intrinsic::hexagon_V6_vmaxub:
277 case Intrinsic::hexagon_V6_vmaxub_128B:
278 case Intrinsic::hexagon_V6_vmaxuh:
279 case Intrinsic::hexagon_V6_vmaxuh_128B:
280 case Intrinsic::hexagon_V6_vminub:
281 case Intrinsic::hexagon_V6_vminub_128B:
282 case Intrinsic::hexagon_V6_vminuh:
283 case Intrinsic::hexagon_V6_vminuh_128B:
284 case Intrinsic::hexagon_V6_vminb:
285 case Intrinsic::hexagon_V6_vminb_128B:
286 case Intrinsic::hexagon_V6_vminh:
287 case Intrinsic::hexagon_V6_vminh_128B:
288 case Intrinsic::hexagon_V6_vminw:
289 case Intrinsic::hexagon_V6_vminw_128B:
290 case Intrinsic::hexagon_V6_vmpyub:
291 case Intrinsic::hexagon_V6_vmpyub_128B:
292 case Intrinsic::hexagon_V6_vmpyuh:
293 case Intrinsic::hexagon_V6_vmpyuh_128B:
294 case Intrinsic::hexagon_V6_vavgub:
295 case Intrinsic::hexagon_V6_vavgub_128B:
296 case Intrinsic::hexagon_V6_vavgh:
297 case Intrinsic::hexagon_V6_vavgh_128B:
298 case Intrinsic::hexagon_V6_vavguh:
299 case Intrinsic::hexagon_V6_vavguh_128B:
300 case Intrinsic::hexagon_V6_vavgw:
301 case Intrinsic::hexagon_V6_vavgw_128B:
302 case Intrinsic::hexagon_V6_vavgb:
303 case Intrinsic::hexagon_V6_vavgb_128B:
304 case Intrinsic::hexagon_V6_vavguw:
305 case Intrinsic::hexagon_V6_vavguw_128B:
306 case Intrinsic::hexagon_V6_vabsdiffh:
307 case Intrinsic::hexagon_V6_vabsdiffh_128B:
308 case Intrinsic::hexagon_V6_vabsdiffub:
309 case Intrinsic::hexagon_V6_vabsdiffub_128B:
310 case Intrinsic::hexagon_V6_vabsdiffuh:
311 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
312 case Intrinsic::hexagon_V6_vabsdiffw:
313 case Intrinsic::hexagon_V6_vabsdiffw_128B:
314 return true;
315 default:
316 return false;
317 }
318}
319
320bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
322 if (->isSameOperationAs(I2))
323 return false;
324
325
326
327
328 if (CallInst *C1 = dyn_cast(I1)) {
329 if (CallInst *C2 = dyn_cast(I2)) {
330 if (C1->getCalledFunction() != C2->getCalledFunction())
331 return false;
332 }
333 }
334
335
336
338 unsigned NumOperands = I1->getNumOperands();
339 for (unsigned i = 0; i < NumOperands; ++i) {
340 ConstantInt *C1 = dyn_cast(I1->getOperand(i));
342 if(!C1) continue;
345 return false;
346 }
347 }
348
349 return true;
350}
351
352bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
354 if ()
355 return true;
356
357 switch (II->getIntrinsicID()) {
358 case Intrinsic::hexagon_V6_hi:
359 case Intrinsic::hexagon_V6_lo:
360 case Intrinsic::hexagon_V6_hi_128B:
361 case Intrinsic::hexagon_V6_lo_128B:
362 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
363 return false;
364 default:
365 return true;
366 }
367}
368void HexagonVectorLoopCarriedReuse::findValueToReuse() {
369 for (auto *D : Dependences) {
370 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
374 << ".. Skipping because number of iterations > than the limit\n");
375 continue;
376 }
377
378 PHINode *PN = cast(D->front());
380 int Iters = D->iterations();
382 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
383 << " can be reused\n");
384
386 for (Use &U : PN->uses()) {
388
389 if (User->getParent() != BB)
390 continue;
391 if (ReplacedInsts.count(User)) {
393 << " has already been replaced. Skipping...\n");
394 continue;
395 }
396 if (isa(User))
397 continue;
398 if (User->mayHaveSideEffects())
399 continue;
400 if (!canReplace(User))
401 continue;
402
404 }
405 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
406
407
408
409
410
411
412
414 for (Use &U : BEInst->uses()) {
415 Instruction *BEUser = cast(U.getUser());
416
418 continue;
419 if (!isEquivalentOperation(I, BEUser))
420 continue;
421
422 int NumOperands = I->getNumOperands();
423
424
425
426
427
428
429
430
431
432
433 std::map<Instruction *, DepChain *> DepChains;
434 CallInst *C1 = dyn_cast(I);
435 if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
436 bool Found = false;
437 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
438 Value *Op = I->getOperand(OpNo);
440 Found = false;
441 for (int T = 0; T < NumOperands; ++T) {
443 Instruction *BEOpInst = dyn_cast(BEOp);
444 if (!OpInst && !BEOpInst) {
445 if (Op == BEOp) {
446 Found = true;
447 break;
448 }
449 }
450
451 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
452 continue;
453
454 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
455
456 if (D) {
457 Found = true;
458 DepChains[OpInst] = D;
459 break;
460 }
461 }
462 if (!Found) {
463 BEUser = nullptr;
464 break;
465 }
466 }
467 } else {
468
469 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
470 Value *Op = I->getOperand(OpNo);
472
474 if (!OpInst) {
475 if (Op == BEOp)
476 continue;
477
478
479 BEUser = nullptr;
480 break;
481 }
482
483 Instruction *BEOpInst = dyn_cast(BEOp);
484 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
485
486 if (D) {
487 DepChains[OpInst] = D;
488 } else {
489 BEUser = nullptr;
490 break;
491 }
492 }
493 }
494 if (BEUser) {
496 ReuseCandidate.Inst2Replace = I;
497 ReuseCandidate.BackedgeInst = BEUser;
498 ReuseCandidate.DepChains = DepChains;
499 ReuseCandidate.Iterations = Iters;
500 return;
501 }
502 ReuseCandidate.reset();
503 }
504 }
505 }
506 ReuseCandidate.reset();
507}
508
509Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
511 PHINode *PN = dyn_cast(Op);
514 return ValueInBlock;
515}
516
517void HexagonVectorLoopCarriedReuse::reuseValue() {
519 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
520 Instruction *BEInst = ReuseCandidate.BackedgeInst;
522 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
523 int Iterations = ReuseCandidate.Iterations;
524 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
525 assert(!DepChains.empty() && "No DepChains");
526 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
527
529 for (int i = 0; i < Iterations; ++i) {
532 for (int j = 0; j < NumOperands; ++j) {
534 if ()
535 continue;
536
537 DepChain &D = *DepChains[I];
538
539
540
541 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
542 InstInPreheader->setOperand(j, ValInPreheader);
543 }
544 InstsInPreheader.push_back(InstInPreheader);
545 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
547 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
548 << LoopPH->getName() << "\n");
549 }
553 Value *BEVal = BEInst;
555 for (int i = Iterations-1; i >=0 ; --i) {
556 Instruction *InstInPreheader = InstsInPreheader[i];
557 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
558 NewPhi->addIncoming(InstInPreheader, LoopPH);
561 << "\n");
562 BEVal = NewPhi;
563 }
564
565
567 ReplacedInsts.insert(Inst2Replace);
568 ++HexagonNumVectorLoopCarriedReuse;
569}
570
571bool HexagonVectorLoopCarriedReuse::doVLCR() {
572 assert(CurLoop->getSubLoops().empty() &&
573 "Can do VLCR on the innermost loop only");
574 assert((CurLoop->getNumBlocks() == 1) &&
575 "Can do VLCR only on single block loops");
576
577 bool Changed = false;
579
580 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
581 do {
582
583 Dependences.clear();
585
586 findLoopCarriedDeps();
587 findValueToReuse();
588 if (ReuseCandidate.isDefined()) {
589 reuseValue();
590 Changed = true;
592 }
593 llvm::for_each(Dependences, std::default_delete());
595 return Changed;
596}
597
598void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
599 DepChain &D) {
600 PHINode *PN = dyn_cast(I);
601 if (!PN) {
603 return;
604 } else {
606 if (NumIncomingValues != 2) {
607 D.clear();
608 return;
609 }
610
612 if (BB != CurLoop->getHeader()) {
613 D.clear();
614 return;
615 }
616
618 Instruction *BEInst = dyn_cast(BEVal);
619
620
621 assert(BEInst && "There should be a value over the backedge");
622
623 Value *PreHdrVal =
625 if(!PreHdrVal || !isa(PreHdrVal)) {
626 D.clear();
627 return;
628 }
629 D.push_back(PN);
630 findDepChainFromPHI(BEInst, D);
631 }
632}
633
634DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
636 int Iters) {
637 for (auto *D : Dependences) {
638 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
639 return D;
640 }
641 return nullptr;
642}
643
644void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
645 BasicBlock *BB = CurLoop->getHeader();
646 for (auto I = BB->begin(), E = BB->end(); I != E && isa(I); ++I) {
647 auto *PN = cast(I);
648 if (!isa(PN->getType()))
649 continue;
650
651 DepChain *D = new DepChain();
652 findDepChainFromPHI(PN, *D);
653 if (D->size() != 0)
654 Dependences.insert(D);
655 else
656 delete D;
657 }
658 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
659 LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";);
660}
661
663 return new HexagonVectorLoopCarriedReuseLegacyPass();
664}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_UNUSED
std::optional< std::vector< StOtherPiece > > Other
static cl::opt< int > HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2))
hexagon Hexagon specific predictive commoning for HVX vectors
This defines the Use class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
This class represents an Operation in the Expression.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
A vector that has set insertion semantics.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool isVectorTy() const
True if this is an instance of VectorType.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Pass * createHexagonVectorLoopCarriedReuseLegacyPass()
Hexagon Vector Loop Carried Reuse Pass.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...