LLVM: lib/CodeGen/MachineUniformityAnalysis.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
17
18using namespace llvm;
19
20template <>
23 for (auto &op : I.all_defs()) {
24 if (isDivergent(op.getReg()))
25 return true;
26 }
27 return false;
28}
29
30template <>
33 bool insertedDivergent = false;
34 const auto &MRI = F.getRegInfo();
35 const auto &RBI = *F.getSubtarget().getRegBankInfo();
36 const auto &TRI = *MRI.getTargetRegisterInfo();
37 for (auto &op : Instr.all_defs()) {
38 if (.getReg().isVirtual())
39 continue;
41 if (TRI.isUniformReg(MRI, RBI, op.getReg()))
42 continue;
43 insertedDivergent |= markDivergent(op.getReg());
44 }
45 return insertedDivergent;
46}
47
48template <>
50 const auto &InstrInfo = *F.getSubtarget().getInstrInfo();
51
54 auto uniformity = InstrInfo.getInstructionUniformity(instr);
55 if (uniformity == InstructionUniformity::AlwaysUniform) {
56 addUniformOverride(instr);
57 continue;
58 }
59
60 if (uniformity == InstructionUniformity::NeverUniform) {
61 markDivergent(instr);
62 }
63 }
64 }
65}
66
67template <>
70 assert(isDivergent(Reg));
71 const auto &RegInfo = F.getRegInfo();
72 for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) {
73 markDivergent(UserInstr);
74 }
75}
76
77template <>
80 assert(!isAlwaysUniform(Instr));
81 if (Instr.isTerminator())
82 return;
85 if (isDivergent(Reg))
86 pushUsers(Reg);
87 }
88}
89
90template <>
93 assert(!isAlwaysUniform(I));
94 for (auto &Op : I.operands()) {
95 if (.isReg() ||
.readsReg())
96 continue;
98
99
100
101 if (Reg.isPhysical())
102 return true;
103
104 auto *Def = F.getRegInfo().getVRegDef(Reg);
106 return true;
107 }
108 return false;
109}
110
111template <>
115 const auto &RegInfo = F.getRegInfo();
116 for (auto &Op : I.all_defs()) {
117 if (.getReg().isVirtual())
118 continue;
120 if (isDivergent(Reg))
121 continue;
123 if (DefCycle.contains(UserInstr.getParent()))
124 continue;
125 markDivergent(UserInstr);
126 }
127 }
128}
129
130template <>
133 if (!U.isReg())
134 return false;
135
136 auto Reg = U.getReg();
137 if (isDivergent(Reg))
138 return true;
139
140 const auto &RegInfo = F.getRegInfo();
141 auto *Def = RegInfo.getOneDef(Reg);
142 if (!Def)
143 return true;
144
145 auto *DefInstr = Def->getParent();
146 auto *UseInstr = U.getParent();
147 return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
148}
149
150
151
155
159 assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!");
161 if (HasBranchDivergence)
163 return UI;
164}
165
166namespace {
167
169public:
170 static char ID;
171
172 MachineUniformityInfoPrinterPass();
173
176};
177
178}
179
181
185}
186
188 "Machine Uniformity Info Analysis", true, true)
193
195 AU.setPreservesAll();
199}
200
202 auto &DomTree = getAnalysis().getDomTree();
203 auto &CI = getAnalysis().getCycleInfo();
204
205
207 return false;
208}
209
211 const Module *) const {
212 OS << "MachineUniformityInfo for function: " << UI.getFunction().getName()
213 << "\n";
215}
216
217char MachineUniformityInfoPrinterPass::ID = 0;
218
219MachineUniformityInfoPrinterPass::MachineUniformityInfoPrinterPass()
223}
224
226 "print-machine-uniformity",
227 "Print Machine Uniformity Info Analysis", true, true)
232
233void MachineUniformityInfoPrinterPass::getAnalysisUsage(
235 AU.setPreservesAll();
238}
239
240bool MachineUniformityInfoPrinterPass::runOnMachineFunction(
242 auto &UI = getAnalysis();
243 UI.print(errs());
244 return false;
245}
unsigned const MachineRegisterInfo * MRI
block Block Frequency Analysis
COFF::MachineTypes Machine
Implementation of uniformity analysis.
This file declares a specialization of the GenericSSAContext template class for Machine IR.
unsigned const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unify loop Fixup each natural loop to have a single exit block
Represent the analysis usage information of a pass.
This class represents an Operation in the Expression.
A possibly irreducible generalization of a Loop.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
Analysis that identifies uniform values in a data-parallel execution.
bool isDivergentUse(const UseT &U) const
bool hasDivergentDefs(const InstructionT &I) const
bool markDefsDivergent(const InstructionT &Instr)
Mark outputs of Instr as divergent.
void print(raw_ostream &Out) const
T helper function for printing.
const FunctionT & getFunction() const
The GPU kernel this analysis result is for.
Legacy analysis pass which computes a MachineCycleInfo.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
Legacy analysis pass which computes a MachineUniformityInfo.
MachineUniformityAnalysisPass()
void print(raw_ostream &OS, const Module *M=nullptr) const override
print - Print out the internal state of the pass.
bool runOnMachineFunction(MachineFunction &F) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
This class implements an extremely fast bulk output stream that can only output to a stream.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< InstrNode * > Instr
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void initializeMachineUniformityAnalysisPassPass(PassRegistry &)
MachineUniformityInfo computeMachineUniformityInfo(MachineFunction &F, const MachineCycleInfo &cycleInfo, const MachineDominatorTree &domTree, bool HasBranchDivergence)
Compute uniformity information for a Machine IR function.
void initializeMachineUniformityInfoPrinterPassPass(PassRegistry &)