LLVM: lib/Target/AMDGPU/SILowerI1Copies.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
28
29#define DEBUG_TYPE "si-i1-copies"
30
31using namespace llvm;
32
36
37namespace {
38
40public:
43
44private:
46
47public:
61
62 bool lowerCopiesFromI1();
63 bool lowerCopiesToI1();
64 bool cleanConstrainRegs(bool Changed);
65 bool isVreg1(Register Reg) const {
66 return Reg.isVirtual() && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
67 }
68};
69
70Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
74
75bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
77 for (Register Reg : ConstrainRegs)
78 MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
79 ConstrainRegs.clear();
80
81 return Changed;
82}
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106class PhiIncomingAnalysis {
109
110
111
116
117public:
120
121
122
124 return ReachableMap.find(&MBB)->second;
125 }
126
128
131 ReachableMap.clear();
132 ReachableOrdered.clear();
133 Predecessors.clear();
134
135
136
137 ReachableMap.try_emplace(&DefBlock, false);
138 ReachableOrdered.push_back(&DefBlock);
139
140 for (auto Incoming : Incomings) {
142 if (MBB == &DefBlock) {
143 ReachableMap[&DefBlock] = true;
144 continue;
145 }
146
149
150
151
154 }
155
156 while (.empty()) {
159 continue;
161
163 }
164
166 bool HaveReachablePred = false;
168 if (ReachableMap.count(Pred)) {
169 HaveReachablePred = true;
170 } else {
171 Stack.push_back(Pred);
172 }
173 }
174 if (!HaveReachablePred)
175 ReachableMap[MBB] = true;
176 if (HaveReachablePred) {
179 Predecessors.push_back(UnreachablePred);
180 }
181 }
183 }
184 }
185};
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219class LoopFinder {
222
223
224
225
227
228
229
231
232
234
235
236
237
238 unsigned FoundLoopLevel = ~0u;
239
243
244public:
246 : DT(DT), PDT(PDT) {}
247
250 CommonDominators.clear();
252 NextLevel.clear();
253 VisitedPostDom = nullptr;
254 FoundLoopLevel = ~0u;
255
256 DefBlock = &MBB;
257 }
258
259
260
261
262
265
266 if (!VisitedPostDom)
267 advanceLevel();
268
269 unsigned Level = 0;
270 while (PDNode->getBlock() != PostDom) {
271 if (PDNode->getBlock() == VisitedPostDom)
272 advanceLevel();
273 PDNode = PDNode->getIDom();
274 Level++;
275 if (FoundLoopLevel == Level)
276 return Level;
277 }
278
279 return 0;
280 }
281
282
283
284
289 assert(LoopLevel < CommonDominators.size());
290
292 for (auto &Incoming : Incomings)
294
295 if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
298 } else {
299
300
302 if (!inLoopLevel(*Pred, LoopLevel, Incomings))
305 }
306 }
307 }
308
309private:
312 auto DomIt = Visited.find(&MBB);
313 if (DomIt != Visited.end() && DomIt->second <= LoopLevel)
314 return true;
315
316 for (auto &Incoming : Incomings)
318 return true;
319
320 return false;
321 }
322
323 void advanceLevel() {
325
326 if (!VisitedPostDom) {
327 VisitedPostDom = DefBlock;
328 VisitedDom = DefBlock;
329 Stack.push_back(DefBlock);
330 } else {
331 VisitedPostDom = PDT.getNode(VisitedPostDom)->getIDom()->getBlock();
332 VisitedDom = CommonDominators.back();
333
334 for (unsigned i = 0; i < NextLevel.size();) {
335 if (PDT.dominates(VisitedPostDom, NextLevel[i])) {
336 Stack.push_back(NextLevel[i]);
337
338 NextLevel[i] = NextLevel.back();
340 } else {
341 i++;
342 }
343 }
344 }
345
346 unsigned Level = CommonDominators.size();
347 while (.empty()) {
351
352 Visited[MBB] = Level;
354
356 if (Succ == DefBlock) {
357 if (MBB == VisitedPostDom)
358 FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
359 else
360 FoundLoopLevel = std::min(FoundLoopLevel, Level);
361 continue;
362 }
363
364 if (Visited.try_emplace(Succ, ~0u).second) {
365 if (MBB == VisitedPostDom)
367 else
368 Stack.push_back(Succ);
369 }
370 }
371 }
372
373 CommonDominators.push_back(VisitedDom);
374 }
375};
376
377}
378
382 return MRI->createVirtualRegister(LaneMaskRegAttrs);
383}
384
393 UndefReg);
394 return UndefReg;
395}
396
397#ifndef NDEBUG
401 unsigned Size = TRI.getRegSizeInBits(Reg, MRI);
402 return Size == 1 || Size == 32;
403}
404#endif
405
406bool Vreg1LoweringHelper::lowerCopiesFromI1() {
407 bool Changed = false;
409
412 if (MI.getOpcode() != AMDGPU::COPY)
413 continue;
414
415 Register DstReg = MI.getOperand(0).getReg();
416 Register SrcReg = MI.getOperand(1).getReg();
417 if (!isVreg1(SrcReg))
418 continue;
419
420 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
421 continue;
422
423 Changed = true;
424
425
428
430 assert(.getOperand(0).getSubReg());
431
432 ConstrainRegs.insert(SrcReg);
433 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
440 }
441
443 MI->eraseFromParent();
444 DeadCopies.clear();
445 }
446 return Changed;
447}
448
452 : MF(MF), DT(DT), PDT(PDT) {
454
458
460 ExecReg = AMDGPU::EXEC_LO;
461 MovOp = AMDGPU::S_MOV_B32;
462 AndOp = AMDGPU::S_AND_B32;
463 OrOp = AMDGPU::S_OR_B32;
464 XorOp = AMDGPU::S_XOR_B32;
465 AndN2Op = AMDGPU::S_ANDN2_B32;
466 OrN2Op = AMDGPU::S_ORN2_B32;
467 } else {
469 MovOp = AMDGPU::S_MOV_B64;
470 AndOp = AMDGPU::S_AND_B64;
471 OrOp = AMDGPU::S_OR_B64;
472 XorOp = AMDGPU::S_XOR_B64;
473 AndN2Op = AMDGPU::S_ANDN2_B64;
474 OrN2Op = AMDGPU::S_ORN2_B64;
475 }
476}
477
481 PhiIncomingAnalysis PIA(*PDT, TII);
484
486 if (Vreg1Phis.empty())
487 return false;
488
493 if (&MBB != PrevMBB) {
494 LF.initialize(MBB);
495 PrevMBB = &MBB;
496 }
497
499
500 Register DstReg = MI->getOperand(0).getReg();
503
505
506
507
508
509
511 return DT->getNode(LHS.Block)->getDFSNumIn() <
513 });
514
515#ifndef NDEBUG
517#endif
518
519
520
521 std::vector<MachineBasicBlock *> DomBlocks = {&MBB};
524
527
528
529
530
531
532 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
533
535
536 if (FoundLoopLevel) {
538 Incomings);
539
540 for (auto &Incoming : Incomings) {
543 }
544
545 for (auto &Incoming : Incomings) {
550 }
551 } else {
552
553
554 PIA.analyze(MBB, Incomings);
555
559
560 for (auto &Incoming : Incomings) {
562 if (PIA.isSource(IMBB)) {
565 } else {
568 }
569 }
570
571 for (auto &Incoming : Incomings) {
573 continue;
574
579 }
580 }
581
583 if (NewReg != DstReg) {
585 MI->eraseFromParent();
586 }
587
588 Incomings.clear();
589 }
590 return true;
591}
592
593bool Vreg1LoweringHelper::lowerCopiesToI1() {
594 bool Changed = false;
596 LoopFinder LF(*DT, *PDT);
598
600 LF.initialize(MBB);
601
603 if (MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
604 MI.getOpcode() != AMDGPU::COPY)
605 continue;
606
607 Register DstReg = MI.getOperand(0).getReg();
608 if (!isVreg1(DstReg))
609 continue;
610
611 Changed = true;
612
613 if (MRI->use_empty(DstReg)) {
615 continue;
616 }
617
619
620 markAsLaneMask(DstReg);
621 initializeLaneMaskRegisterAttributes(DstReg);
622
623 if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
624 continue;
625
627 Register SrcReg = MI.getOperand(1).getReg();
628 assert(.getOperand(1).getSubReg());
629
630 if (!SrcReg.isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
631 assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
636 MI.getOperand(1).setReg(TmpReg);
637 SrcReg = TmpReg;
638 } else {
639
640 MI.getOperand(1).setIsKill(false);
641 }
642
643
644
645 std::vector<MachineBasicBlock *> DomBlocks = {&MBB};
648
651 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
652 if (FoundLoopLevel) {
655 LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs);
656
657 buildMergeLaneMasks(MBB, MI, DL, DstReg,
660 }
661 }
662
664 MI->eraseFromParent();
665 DeadCopies.clear();
666 }
667 return Changed;
668}
669
672 for (;;) {
674 if (MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
675 return true;
676
677 if (MI->getOpcode() != AMDGPU::COPY)
678 break;
679
680 Reg = MI->getOperand(1).getReg();
681 if (!Reg.isVirtual())
682 return false;
684 return false;
685 }
686
687 if (MI->getOpcode() != MovOp)
688 return false;
689
690 if (->getOperand(1).isImm())
691 return false;
692
693 int64_t Imm = MI->getOperand(1).getImm();
694 if (Imm == 0) {
695 Val = false;
696 return true;
697 }
698 if (Imm == -1) {
699 Val = true;
700 return true;
701 }
702
703 return false;
704}
705
707 Def = false;
708 Use = false;
709
711 if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
712 if (MO.isUse())
713 Use = true;
714 else
715 Def = true;
716 }
717 }
718}
719
720
721
725 bool TerminatorsUseSCC = false;
726 for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) {
727 bool DefsSCC;
729 if (TerminatorsUseSCC || DefsSCC)
730 break;
731 }
732
733 if (!TerminatorsUseSCC)
734 return InsertionPt;
735
736 while (InsertionPt != MBB.begin()) {
737 InsertionPt--;
738
739 bool DefSCC, UseSCC;
741 if (DefSCC)
742 return InsertionPt;
743 }
744
745
746 llvm_unreachable("SCC used by terminator but no def in block");
747}
748
749
750void Vreg1LoweringHelper::markAsLaneMask(Register DstReg) const {
751 MRI->setRegClass(DstReg, ST->getBoolRC());
752}
753
754void Vreg1LoweringHelper::getCandidatesForLowering(
758 if (isVreg1(MI.getOperand(0).getReg()))
760 }
761 }
762}
763
764void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
766 for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
767 assert(i + 1 < MI->getNumOperands());
768 Register IncomingReg = MI->getOperand(i).getReg();
770 MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
771
772 if (IncomingDef->getOpcode() == AMDGPU::COPY) {
774 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
776 } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
777 continue;
778 } else {
779 assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
780 }
781
783 }
784}
785
786void Vreg1LoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
788 MRI->replaceRegWith(NewReg, OldReg);
789}
790
796 bool PrevVal = false;
797 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
798 bool CurVal = false;
799 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
800
801 if (PrevConstant && CurConstant) {
802 if (PrevVal == CurVal) {
804 } else if (CurVal) {
806 } else {
810 }
811 return;
812 }
813
816 if (!PrevConstant) {
817 if (CurConstant && CurVal) {
818 PrevMaskedReg = PrevReg;
819 } else {
824 }
825 }
826 if (!CurConstant) {
827
828 if (PrevConstant && PrevVal) {
829 CurMaskedReg = CurReg;
830 } else {
835 }
836 }
837
838 if (PrevConstant && !PrevVal) {
840 .addReg(CurMaskedReg);
841 } else if (CurConstant && !CurVal) {
843 .addReg(PrevMaskedReg);
844 } else if (PrevConstant && PrevVal) {
846 .addReg(CurMaskedReg)
848 } else {
850 .addReg(PrevMaskedReg)
851 .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
852 }
853}
854
855void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {}
856
857
858
859
860
861
862
863
864
865
868
871 return false;
872
873 Vreg1LoweringHelper Helper(&MF, &MDT, &MPDT);
874 bool Changed = false;
875 Changed |= Helper.lowerCopiesFromI1();
876 Changed |= Helper.lowerPhis();
877 Changed |= Helper.lowerCopiesToI1();
878 return Helper.cleanConstrainRegs(Changed);
879}
880
888 if (!Changed)
890
891
894 return PA;
895}
896
898public:
900
903 }
904
906
908
914 }
915};
916
919 getAnalysis().getDomTree();
921 getAnalysis().getPostDomTree();
923}
924
926 false, false)
931
933
935
938}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
static Register insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
static bool runFixI1Copies(MachineFunction &MF, MachineDominatorTree &MDT, MachinePostDominatorTree &MPDT)
Lower all instructions that def or use vreg_1 registers.
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void updateDFSNumbers() const
updateDFSNumbers - Assign In and Out numbers to the nodes while walking dominator tree in dfs order.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
void push_back(MachineInstr *MI)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineBasicBlock * findNearestCommonDominator(ArrayRef< MachineBasicBlock * > Blocks) const
Returns the nearest common dominator of the given blocks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, MachinePostDominatorTree *PDT)
bool isLaneMaskReg(Register Reg) const
MachineRegisterInfo * MRI
MachineDominatorTree * DT
DenseSet< Register > PhiRegisters
virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0
virtual void constrainAsLaneMask(Incoming &In)=0
virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0
virtual void markAsLaneMask(Register DstReg) const =0
MachinePostDominatorTree * PDT
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs
MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const
Return a point at the end of the given MBB to insert SALU instructions for lane mask calculation.
void initializeLaneMaskRegisterAttributes(Register LaneMask)
bool isConstantLaneMask(Register Reg, bool &Val) const
virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0
virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
void initializeSILowerI1CopiesLegacyPass(PassRegistry &)
FunctionPass * createSILowerI1CopiesLegacyPass()
char & SILowerI1CopiesLegacyID
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
MachineBasicBlock * Block
All attributes(register class or bank and low-level type) a virtual register can have.