LLVM: lib/Target/AMDGPU/SIFormMemoryClauses.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
22
23using namespace llvm;
24
25#define DEBUG_TYPE "si-form-memory-clauses"
26
27
28
31 cl::desc("Maximum length of a memory clause, instructions"));
32
33namespace {
34
35class SIFormMemoryClausesImpl {
37
38 bool canBundle(const MachineInstr &MI, const RegUse &Defs,
39 const RegUse &Uses) const;
41 void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
42 bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
44
50
51 unsigned LastRecordedOccupancy;
52 unsigned MaxVGPRs;
53 unsigned MaxSGPRs;
54
55public:
56 SIFormMemoryClausesImpl(LiveIntervals *LS) : LIS(LS) {}
57 bool run(MachineFunction &MF);
58};
59
61public:
62 static char ID;
63
64 SIFormMemoryClausesLegacy() : MachineFunctionPass(ID) {
66 }
67
68 bool runOnMachineFunction(MachineFunction &MF) override;
69
70 StringRef getPassName() const override {
71 return "SI Form memory clauses";
72 }
73
74 void getAnalysisUsage(AnalysisUsage &AU) const override {
75 AU.addRequired();
78 }
79
80 MachineFunctionProperties getClearedProperties() const override {
81 return MachineFunctionProperties().setIsSSA();
82 }
83};
84
85}
86
88 "SI Form memory clauses", false, false)
92
93char SIFormMemoryClausesLegacy::ID = 0;
94
96
98 return new SIFormMemoryClausesLegacy();
99}
100
104
108
109
110
112 assert(.isDebugInstr() && "debug instructions should not reach here");
113 if (MI.isBundled())
114 return false;
115 if (.mayLoad() || MI.mayStore())
116 return false;
118 return false;
120 return false;
122 return false;
123
125 Register ResReg = ResMO.getReg();
127 if (MO.getReg() == ResReg)
128 return false;
129 }
130 break;
131 }
132 return true;
133}
134
136 unsigned S = 0;
149 return S;
150}
151
152
153
154bool SIFormMemoryClausesImpl::canBundle(const MachineInstr &MI,
155 const RegUse &Defs,
156 const RegUse &Uses) const {
157
158 for (const MachineOperand &MO : MI.operands()) {
159
160
161 if (MO.isFI())
162 return false;
163
164 if (!MO.isReg())
165 continue;
166
168
169
170 if (MO.isTied())
171 return false;
172
173 const RegUse &Map = MO.isDef() ? Uses : Defs;
174 auto Conflict = Map.find(Reg);
175 if (Conflict == Map.end())
176 continue;
177
179 return false;
180
181 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
182 if ((Conflict->second.second & Mask).any())
183 return false;
184 }
185
186 return true;
187}
188
189
190
191
192bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,
193 GCNDownwardRPTracker &RPT) {
194
195
196
197
200 unsigned Occupancy = MaxPressure.getOccupancy(
201 *ST,
202 MI.getMF()->getInfo()->getDynamicVGPRBlockSize());
203
204
205
206
207
208
209
210
211
212
215 MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
216 LastRecordedOccupancy = Occupancy;
217 return true;
218 }
219 return false;
220}
221
222
223void SIFormMemoryClausesImpl::collectRegUses(const MachineInstr &MI,
224 RegUse &Defs, RegUse &Uses) const {
225 for (const MachineOperand &MO : MI.operands()) {
226 if (!MO.isReg())
227 continue;
229 if ()
230 continue;
231
233 ? TRI->getSubRegIndexLaneMask(MO.getSubReg())
235 RegUse &Map = MO.isDef() ? Defs : Uses;
236
238 auto [Loc, Inserted] = Map.try_emplace(Reg, State, Mask);
239 if (!Inserted) {
240 Loc->second.first |= State;
241 Loc->second.second |= Mask;
242 }
243 }
244}
245
246
247
248
249bool SIFormMemoryClausesImpl::processRegUses(const MachineInstr &MI,
250 RegUse &Defs, RegUse &Uses,
251 GCNDownwardRPTracker &RPT) {
252 if (!canBundle(MI, Defs, Uses))
253 return false;
254
255 if (!checkPressure(MI, RPT))
256 return false;
257
258 collectRegUses(MI, Defs, Uses);
259 return true;
260}
261
262bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {
265 return false;
266
270 MFI = MF.getInfo();
273
274 MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
275 MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
277 "amdgpu-max-memory-clause", MaxClause);
278
279 for (MachineBasicBlock &MBB : MF) {
280 GCNDownwardRPTracker RPT(*LIS);
285
286 if (MI.isMetaInstruction())
287 continue;
288
290
292 continue;
293
296 else {
299 }
300
302 RegUse Defs, Uses;
303 if (!processRegUses(MI, Defs, Uses, RPT)) {
304 RPT.reset(MI, &LiveRegsCopy);
305 continue;
306 }
307
311
312 if (Next->isMetaInstruction())
313 continue;
314
316 break;
317
318
319
320
321 if (!processRegUses(*Next, Defs, Uses, RPT))
322 break;
323
324 LastClauseInst = Next;
326 }
328 RPT.reset(MI, &LiveRegsCopy);
329 continue;
330 }
331
334
335 assert(!LastClauseInst->isMetaInstruction());
336
338 SlotIndex ClauseLiveOutIdx =
340
341
342 MachineInstrBuilder Kill;
343
344
345
346 for (auto &&R : Uses) {
349 continue;
350
351
353 const LiveInterval &LI = LIS->getInterval(R.first);
354
356 if (!LI.liveAt(ClauseLiveOutIdx)) {
358 AMDGPU::NoSubRegister);
359 }
360 } else {
361 LaneBitmask KilledMask;
362 for (const LiveInterval::SubRange &SR : LI.subranges()) {
363 if (SR.liveAt(ClauseLiveInIdx) && !SR.liveAt(ClauseLiveOutIdx))
364 KilledMask |= SR.LaneMask;
365 }
366
367 if (KilledMask.none())
368 continue;
369
370 SmallVector KilledIndexes;
371 bool Success = TRI->getCoveringSubRegIndexes(
372 MRI->getRegClass(Reg), KilledMask, KilledIndexes);
374 assert(Success && "Failed to find subregister mask to cover lanes");
375 for (unsigned SubReg : KilledIndexes) {
377 }
378 }
379
380 if (KillOps.empty())
381 continue;
382
383
384
385
386
387
388 Kill = BuildMI(*MI.getParent(), std::next(LastClauseInst),
390 for (auto &Op : KillOps)
391 Kill.addUse(Reg, std::get<0>(Op), std::get<1>(Op));
392 Ind->insertMachineInstrInMaps(*Kill);
393 }
394
395
396 RPT.reset(MI, &LiveRegsCopy);
397
398 if (!Kill)
399 continue;
400
401 for (auto &&R : Defs) {
405 continue;
408 }
409
410 for (auto &&R : Uses) {
413 continue;
416 }
417 }
418 }
419
421}
422
423bool SIFormMemoryClausesLegacy::runOnMachineFunction(MachineFunction &MF) {
425 return false;
426
427 LiveIntervals *LIS = &getAnalysis().getLIS();
428 return SIFormMemoryClausesImpl(LIS).run(MF);
429}
430
431PreservedAnalyses
435 SIFormMemoryClausesImpl(&LIS).run(MF);
437}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static cl::opt< unsigned > MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15), cl::desc("Maximum length of a memory clause, instructions"))
static bool isVMEMClauseInst(const MachineInstr &MI)
Definition SIFormMemoryClauses.cpp:101
static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause)
Definition SIFormMemoryClauses.cpp:111
static unsigned getMopState(const MachineOperand &MO)
Definition SIFormMemoryClauses.cpp:135
static bool isSMEMClauseInst(const MachineInstr &MI)
Definition SIFormMemoryClauses.cpp:105
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
FunctionPass class - This class is used to implement most global optimizations.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
GCNRegPressure moveMaxPressure()
return MaxPressure and clear it.
bool advanceBeforeNext(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state right before the next MI or after the end of MBB.
bool advance(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state at the next MI.
MachineBasicBlock::const_iterator getNext() const
bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs=nullptr)
Reset tracker to the point before the MI filling LiveRegs upon this point using LIS.
void advanceToNext(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state at the MI, advanceBeforeNext has to be called first.
const decltype(LiveRegs) & getLiveRegs() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
bool hasGFX90AInsts() const
const SIInstrInfo * getInstrInfo() const override
const SIRegisterInfo * getRegisterInfo() const override
bool isXNACKEnabled() const
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
bool liveAt(SlotIndex index) const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
instr_iterator instr_begin()
MachineInstrBundleIterator< const MachineInstr > const_iterator
Instructions::iterator instr_iterator
instr_iterator instr_end()
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool isValid() const
Check for null.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition SIFormMemoryClauses.cpp:432
static bool isVMEM(const MachineInstr &MI)
static bool isSMRD(const MachineInstr &MI)
static bool isAtomic(const MachineInstr &MI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getMinAllowedOccupancy() const
void limitOccupancy(const MachineFunction &MF)
SlotIndex getNextIndex() const
Returns the next index.
reference emplace_back(ArgTypes &&... Args)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Renamable
Register that may be renamed.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ EarlyClobber
Register definition happens before uses.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIFormMemoryClausesLegacyPass()
Definition SIFormMemoryClauses.cpp:97
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
char & SIFormMemoryClausesID
Definition SIFormMemoryClauses.cpp:95
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
void initializeSIFormMemoryClausesLegacyPass(PassRegistry &)
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
unsigned getVGPRNum(bool UnifiedVGPRFile) const
unsigned getOccupancy(const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize) const
unsigned getSGPRNum() const
static constexpr LaneBitmask getAll()
constexpr bool none() const