LLVM: lib/Target/AMDGPU/GCNNSAReassign.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
27
28using namespace llvm;
29
30#define DEBUG_TYPE "amdgpu-nsa-reassign"
31
33 "Number of NSA instructions with non-sequential address found");
35 "Number of NSA instructions changed to sequential");
36
37namespace {
38class GCNNSAReassignImpl {
39public:
41 : VRM(VM), LRM(LM), LIS(LS) {}
42
43 bool run(MachineFunction &MF);
44
45private:
46 enum NSA_Status {
47 NOT_NSA,
48 FIXED,
49 NON_CONTIGUOUS,
50
51 CONTIGUOUS
52 };
53
54 const GCNSubtarget *ST;
55
56 const MachineRegisterInfo *MRI;
57
58 const SIRegisterInfo *TRI;
59
60 VirtRegMap *VRM;
61
62 LiveRegMatrix *LRM;
63
64 LiveIntervals *LIS;
65
66 unsigned MaxNumVGPRs;
67
69
70 NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
71
72 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
73 unsigned StartReg) const;
74
75 bool canAssign(unsigned StartReg, unsigned NumRegs) const;
76
77 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
78};
79
81public:
82 static char ID;
83
84 GCNNSAReassignLegacy() : MachineFunctionPass(ID) {
86 }
87
88 bool runOnMachineFunction(MachineFunction &MF) override;
89
90 StringRef getPassName() const override { return "GCN NSA Reassign"; };
91
92 void getAnalysisUsage(AnalysisUsage &AU) const override {
93 AU.addRequired();
94 AU.addRequired();
95 AU.addRequired();
98 }
99};
100
101}
102
104 false, false)
110
111char GCNNSAReassignLegacy::ID = 0;
112
114
115bool GCNNSAReassignImpl::tryAssignRegisters(
117 unsigned NumRegs = Intervals.size();
118
119 for (unsigned N = 0; N < NumRegs; ++N)
120 if (VRM->hasPhys(Intervals[N]->reg()))
121 LRM->unassign(*Intervals[N]);
122
123 for (unsigned N = 0; N < NumRegs; ++N)
124 if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))
125 return false;
126
127 for (unsigned N = 0; N < NumRegs; ++N)
129
130 return true;
131}
132
133bool GCNNSAReassignImpl::canAssign(unsigned StartReg, unsigned NumRegs) const {
134 for (unsigned N = 0; N < NumRegs; ++N) {
135 unsigned Reg = StartReg + N;
136 if (->isAllocatable(Reg))
137 return false;
138
139 for (unsigned I = 0; CSRegs[I]; ++I)
140 if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
142 return false;
143 }
144
145 return true;
146}
147
148bool GCNNSAReassignImpl::scavengeRegs(
149 SmallVectorImpl<LiveInterval *> &Intervals) const {
150 unsigned NumRegs = Intervals.size();
151
152 if (NumRegs > MaxNumVGPRs)
153 return false;
154 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
155
156 for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
157 if (!canAssign(Reg, NumRegs))
158 continue;
159
160 if (tryAssignRegisters(Intervals, Reg))
161 return true;
162 }
163
164 return false;
165}
166
167GCNNSAReassignImpl::NSA_Status
168GCNNSAReassignImpl::CheckNSA(const MachineInstr &MI, bool Fast) const {
171 return NSA_Status::NOT_NSA;
172
173 switch (Info->MIMGEncoding) {
174 case AMDGPU::MIMGEncGfx10NSA:
175 case AMDGPU::MIMGEncGfx11NSA:
176 break;
177 default:
178 return NSA_Status::NOT_NSA;
179 }
180
181 int VAddr0Idx =
182 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
183
184 unsigned VgprBase = 0;
185 bool NSA = false;
186 for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
187 const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
190 return NSA_Status::FIXED;
191
193
195 if (!PhysReg)
196 return NSA_Status::FIXED;
197
198
199
200
201
202
203
204
205
206
207 if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg())
208 return NSA_Status::FIXED;
209
210
211
212
213
214
216 return NSA_Status::FIXED;
217
218 const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
219
220 if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
221 return NSA_Status::FIXED;
222
223 for (auto U : MRI->use_nodbg_operands(Reg)) {
224 if (U.isImplicit())
225 return NSA_Status::FIXED;
226 const MachineInstr *UseInst = U.getParent();
228 return NSA_Status::FIXED;
229 }
230
232 return NSA_Status::FIXED;
233 }
234
235 if (I == 0)
236 VgprBase = PhysReg;
237 else if (VgprBase + I != PhysReg)
238 NSA = true;
239 }
240
241 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
242}
243
244bool GCNNSAReassignImpl::run(MachineFunction &MF) {
247 return false;
248
251
252 const SIMachineFunctionInfo *MFI = MF.getInfo();
254 MaxNumVGPRs = std::min(
256 MaxNumVGPRs);
257 CSRegs = MRI->getCalleeSavedRegs();
258
259 using Candidate = std::pair<const MachineInstr*, bool>;
261 for (const MachineBasicBlock &MBB : MF) {
262 for (const MachineInstr &MI : MBB) {
263 switch (CheckNSA(MI)) {
264 default:
265 continue;
266 case NSA_Status::CONTIGUOUS:
267 Candidates.push_back(std::pair(&MI, true));
268 break;
269 case NSA_Status::NON_CONTIGUOUS:
270 Candidates.push_back(std::pair(&MI, false));
271 ++NumNSAInstructions;
272 break;
273 }
274 }
275 }
276
278 for (auto &C : Candidates) {
279 if (C.second)
280 continue;
281
282 const MachineInstr *MI = C.first;
283 if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
284
285 C.second = true;
286 ++NumNSAConverted;
287 continue;
288 }
289
291 int VAddr0Idx =
292 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
293
296 SlotIndex MinInd, MaxInd;
297 for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
298 const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
302
303 Intervals.clear();
304 break;
305 }
308 if (LI->empty()) {
309
310
311 if (I == 0)
313 continue;
314 }
315 MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
316 MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
317 }
318
319 if (Intervals.empty())
320 continue;
321
323 << "\tOriginal allocation:\t";
324 for (auto *LI
325 : Intervals) dbgs()
327 dbgs() << '\n');
328
329 bool Success = scavengeRegs(Intervals);
332 if (VRM->hasPhys(Intervals.back()->reg()))
333 continue;
334 } else {
335
336 auto *I =
337 std::lower_bound(Candidates.begin(), &C, MinInd,
338 [this](const Candidate &C, SlotIndex I) {
339 return LIS->getInstructionIndex(*C.first) < I;
340 });
341 for (auto *E = Candidates.end();
343 ++I) {
344 if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
346 LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
347 }
348 }
349 }
350
352 for (unsigned I = 0; I < Info->VAddrOperands; ++I)
353 if (VRM->hasPhys(Intervals[I]->reg()))
355
356 for (unsigned I = 0; I < Info->VAddrOperands; ++I)
357 LRM->assign(*Intervals[I], OrigRegs[I]);
358
359 continue;
360 }
361
362 C.second = true;
363 ++NumNSAConverted;
365 dbgs() << "\tNew allocation:\t\t ["
367 << " : "
369 << "]\n");
371 }
372
374}
375
376bool GCNNSAReassignLegacy::runOnMachineFunction(MachineFunction &MF) {
377 auto *VRM = &getAnalysis().getVRM();
378 auto *LRM = &getAnalysis().getLRM();
379 auto *LIS = &getAnalysis().getLIS();
380
381 GCNNSAReassignImpl Impl(VRM, LRM, LIS);
382 return Impl.run(MF);
383}
384
385PreservedAnalyses
391
392 GCNNSAReassignImpl Impl(&VRM, &LRM, &LIS);
393 Impl.run(MF);
395}
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition GCNNSAReassign.cpp:386
bool hasNonNSAEncoding() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasNSAEncoding() const
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
Register getPreSplitReg(Register virtReg) const
returns the live interval virtReg is split from.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
bool isAssignedReg(Register virtReg) const
returns true if the specified virtual register is not mapped to a stack slot or rematerialized.
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
char & GCNNSAReassignID
Definition GCNNSAReassign.cpp:113
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
void initializeGCNNSAReassignLegacyPass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.