LLVM: lib/Target/AMDGPU/SILowerSGPRSpills.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
28
29using namespace llvm;
30
31#define DEBUG_TYPE "si-lower-sgpr-spills"
32
34
35namespace {
36
38 "amdgpu-num-vgprs-for-wwm-alloc",
39 cl::desc("Max num VGPRs for whole-wave register allocation."),
41
42class SILowerSGPRSpills {
43private:
49
50
51
54
55public:
58 : LIS(LIS), Indexes(Indexes), MDT(MDT) {}
63 void updateLaneVGPRDomInstr(
67};
68
70public:
71 static char ID;
72
74
76
77 void getAnalysisUsage(AnalysisUsage &AU) const override {
81 }
82
84
86 }
87};
88
89}
90
91char SILowerSGPRSpillsLegacy::ID = 0;
92
94 "SI lower SGPR spill instructions", false, false)
99 "SI lower SGPR spill instructions", false, false)
100
102
106 if (MBB.isLiveIn(*R)) {
107 return true;
108 }
109 }
110 return false;
111}
112
113
122
126
128
131 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
132
133
134
135
136
138 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
140
141 if (Indexes) {
145 }
146
147 if (LIS)
149 }
150 } else {
151
152 if (Indexes)
154
155 if (LIS)
158 }
159}
160
161
169
170
173 I == RestoreBlock.begin() ? I : std::prev(I);
174
175
178
179
181
182 if (Indexes) {
185 }
186
187 if (LIS)
189 }
190 } else {
191
192 if (Indexes)
195
196 if (LIS)
199 }
200}
201
202
203
204void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
205 const MachineFrameInfo &MFI = MF.getFrameInfo();
206
207
208
209
210
211
214 "Multiple save points not yet supported!");
215 const auto &SavePoint = *MFI.getSavePoints().begin();
216 SaveBlocks.push_back(SavePoint.first);
218 "Multiple restore points not yet supported!");
220 MachineBasicBlock *RestoreBlock = RestorePoint.first;
221
222
223
225 RestoreBlocks.push_back(RestoreBlock);
226 return;
227 }
228
229
231 for (MachineBasicBlock &MBB : MF) {
236 }
237}
238
239
240
243
245 EntryBB.addLiveIn(CSIReg.getReg());
247}
248
249bool SILowerSGPRSpills::spillCalleeSavedRegs(
250 MachineFunction &MF, SmallVectorImpl &CalleeSavedFIs) {
253 const GCNSubtarget &ST = MF.getSubtarget();
254 const SIFrameLowering *TFI = ST.getFrameLowering();
256 RegScavenger *RS = nullptr;
257
258
259 BitVector SavedRegs;
260 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
261
262
263 if (.hasFnAttribute(Attribute::Naked)) {
264
265
267
268 std::vector CSI;
269 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
270
271 for (unsigned I = 0; CSRegs[I]; ++I) {
272 MCRegister Reg = CSRegs[I];
273
274 if (SavedRegs.test(Reg)) {
275 const TargetRegisterClass *RC =
276 TRI->getMinimalPhysRegClass(Reg, MVT::i32);
278 TRI->getSpillAlign(*RC), true);
279
280 CSI.emplace_back(Reg, JunkFI);
281 CalleeSavedFIs.push_back(JunkFI);
282 }
283 }
284
285 if (!CSI.empty()) {
286 for (MachineBasicBlock *SaveBlock : SaveBlocks)
288
289
290 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
292
293 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
295 return true;
296 }
297 }
298
299 return false;
300}
301
302void SILowerSGPRSpills::updateLaneVGPRDomInstr(
304 DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
305
306
307
308
309
310
311 SIMachineFunctionInfo *FuncInfo =
316 for (auto &Spill : VGPRSpills) {
317 if (PrevLaneVGPR == Spill.VGPR)
318 continue;
319
320 PrevLaneVGPR = Spill.VGPR;
321 auto I = LaneVGPRDomInstr.find(Spill.VGPR);
322 if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {
323
324 LaneVGPRDomInstr[Spill.VGPR] = InsertPt;
325 } else {
327 auto PrevInsertPt = I->second;
328 MachineBasicBlock *DomMBB = PrevInsertPt->getParent();
329 if (DomMBB == MBB) {
330
331
332
333
334 if (MDT->dominates(&*InsertPt, &*PrevInsertPt))
335 I->second = InsertPt;
336
337 continue;
338 }
339
340
341
343 if (DomMBB == MBB)
344 I->second = InsertPt;
345 else if (DomMBB != PrevInsertPt->getParent())
347 }
348 }
349}
350
351void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
352 BitVector &RegMask) {
353
354
355 SIMachineFunctionInfo *MFI = MF.getInfo();
357 BitVector ReservedRegs = TRI->getReservedRegs(MF);
358 BitVector NonWwmAllocMask(TRI->getNumRegs());
359 const GCNSubtarget &ST = MF.getSubtarget();
360
361
362
363
364 unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
365 NumRegs =
366 std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
367
368 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
369
370
371 unsigned I = 0;
372 for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;
373 (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
374 if (!ReservedRegs.test(Reg) &&
375 .isPhysRegUsed(Reg, true)) {
376 TRI->markSuperRegs(RegMask, Reg);
377 ++I;
378 }
379 }
380
381 if (I != NumRegs) {
382
383 TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
385 "cannot find enough VGPRs for wwm-regalloc");
386 }
387}
388
389bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
390 auto *LISWrapper = getAnalysisIfAvailable();
391 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
392 auto *SIWrapper = getAnalysisIfAvailable();
393 SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
394 MachineDominatorTree *MDT =
395 &getAnalysis().getDomTree();
396 return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
397}
398
399bool SILowerSGPRSpills::run(MachineFunction &MF) {
400 const GCNSubtarget &ST = MF.getSubtarget();
403
405
406
407
408 calculateSaveRestoreBlocks(MF);
409 SmallVector CalleeSavedFIs;
410 bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
411
414 SIMachineFunctionInfo *FuncInfo = MF.getInfo();
415
417 SaveBlocks.clear();
418 RestoreBlocks.clear();
419 return false;
420 }
421
422 bool MadeChange = false;
423 bool SpilledToVirtVGPRLanes = false;
424
425
426
427 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
429 if (HasSGPRSpillToVGPR) {
430
431
432
433
434
435
436
438
439
440 DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
441
442 for (MachineBasicBlock &MBB : MF) {
444 if (->isSGPRSpill(MI))
445 continue;
446
447 if (MI.getOperand(0).isUndef()) {
448 if (Indexes)
450 MI.eraseFromParent();
451 continue;
452 }
453
454 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
456
458 if (IsCalleeSaveSGPRSpill) {
459
460
461
462
463
464
465
466
467
468
470 MF, FI, true)) {
471 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
472 MI, FI, nullptr, Indexes, LIS, true);
473 if (!Spilled)
475 "failed to spill SGPR to physical VGPR lane when allocated");
476 }
477 } else {
478 MachineInstrSpan MIS(&MI, &MBB);
480 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
481 MI, FI, nullptr, Indexes, LIS);
482 if (!Spilled)
484 "failed to spill SGPR to virtual VGPR lane when allocated");
485 SpillFIs.set(FI);
486 updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr);
487 SpilledToVirtVGPRLanes = true;
488 }
489 }
490 }
491 }
492
494 auto InsertPt = LaneVGPRDomInstr[Reg];
495
496 MachineBasicBlock &Block = *InsertPt->getParent();
498 auto MIB =
500
501
503
504
506 if (LIS) {
509 }
510 }
511
512
513
515 BitVector WwmRegMask(TRI->getNumRegs());
516
517 determineRegsForWWMAllocation(MF, WwmRegMask);
518
519 BitVector NonWwmRegMask(WwmRegMask);
520 NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask());
521
522
523
525 }
526
527 for (MachineBasicBlock &MBB : MF) {
528
529
530
531
532 for (MachineInstr &MI : MBB) {
533 if (MI.isDebugValue()) {
534 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
535 if (MI.getOperand(StackOperandIdx).isFI() &&
537 MI.getOperand(StackOperandIdx).getIndex()) &&
538 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
539 MI.getOperand(StackOperandIdx)
540 .ChangeToRegister(Register(), false );
541 }
542 }
543 }
544 }
545
546
547
548
549
550
552
553 MadeChange = true;
554 }
555
556 if (SpilledToVirtVGPRLanes) {
557 const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
558
559
560
561 Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
562 if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
565 } else {
566
567
569 }
570
571 SaveBlocks.clear();
572 RestoreBlocks.clear();
573
574 return MadeChange;
575}
576
577PreservedAnalyses
584 SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
586}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static void insertCSRRestores(MachineBasicBlock &RestoreBlock, std::vector< CalleeSavedInfo > &CSI)
Insert restore code for the callee-saved registers used in the function.
SmallVector< MachineBasicBlock *, 4 > MBBVector
static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI)
Insert spill code for the callee-saved registers used in the function.
static void updateLiveness(MachineFunction &MF)
Helper function to update the liveness information for the callee-saved registers.
This file declares the machine register scavenger class.
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB, const TargetRegisterInfo *TRI)
Definition SILowerSGPRSpills.cpp:103
static void insertCSRRestores(MachineBasicBlock &RestoreBlock, MutableArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert restore code for the callee-saved registers used in the function.
Definition SILowerSGPRSpills.cpp:162
static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert spill code for the callee-saved registers used in the function.
Definition SILowerSGPRSpills.cpp:114
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool test(unsigned Idx) const
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
iterator find(const_arg_type_t< KeyT > Val)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
An RAII based helper class to modify MachineFunctionProperties when running pass.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &)
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setCalleeSavedInfoValid(bool v)
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
const SaveRestorePoints & getRestorePoints() const
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
const SaveRestorePoints & getSavePoints() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
Representation of each machine instruction.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition SILowerSGPRSpills.cpp:578
void setSGPRForEXECCopy(Register Reg)
void setFlag(Register Reg, uint8_t Flag)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
Register getSGPRForEXECCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void updateNonWWMRegMask(BitVector &RegMask)
bool hasSpilledSGPRs() const
ArrayRef< Register > getSGPRSpillVGPRs() const
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
LLVM_ABI void removeMachineInstrFromMaps(MachineInstr &MI, bool AllowBundled=false)
Removes machine instruction (bundle) MI from the mapping.
LLVM_ABI void repairIndexesInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End)
Repair indexes after adding and removing instructions.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Information about stack frame layout on the target.
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
TargetInstrInfo - Interface to description of machine instruction set.
const TargetRegisterInfo & getRegisterInfo() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
auto reverse(ContainerTy &&C)
char & SILowerSGPRSpillsLegacyID
Definition SILowerSGPRSpills.cpp:101
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.