LLVM: lib/Target/AMDGPU/SILowerSGPRSpills.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

28

29using namespace llvm;

30

31#define DEBUG_TYPE "si-lower-sgpr-spills"

32

34

35namespace {

36

38 "amdgpu-num-vgprs-for-wwm-alloc",

39 cl::desc("Max num VGPRs for whole-wave register allocation."),

41

42class SILowerSGPRSpills {

43private:

49

50

51

54

55public:

58 : LIS(LIS), Indexes(Indexes), MDT(MDT) {}

63 void updateLaneVGPRDomInstr(

67};

68

70public:

71 static char ID;

72

74

76

77 void getAnalysisUsage(AnalysisUsage &AU) const override {

81 }

82

84

86 }

87};

88

89}

90

91char SILowerSGPRSpillsLegacy::ID = 0;

92

94 "SI lower SGPR spill instructions", false, false)

99 "SI lower SGPR spill instructions", false, false)

100

102

106 if (MBB.isLiveIn(*R)) {

107 return true;

108 }

109 }

110 return false;

111}

112

113

122

126

128

131 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);

132

133

134

135

136

138 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),

140

141 if (Indexes) {

145 }

146

147 if (LIS)

149 }

150 } else {

151

152 if (Indexes)

154

155 if (LIS)

158 }

159}

160

161

169

170

173 I == RestoreBlock.begin() ? I : std::prev(I);

174

175

178

179

181

182 if (Indexes) {

185 }

186

187 if (LIS)

189 }

190 } else {

191

192 if (Indexes)

195

196 if (LIS)

199 }

200}

201

202

203

204void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {

205 const MachineFrameInfo &MFI = MF.getFrameInfo();

206

207

208

209

210

211

214 "Multiple save points not yet supported!");

215 const auto &SavePoint = *MFI.getSavePoints().begin();

216 SaveBlocks.push_back(SavePoint.first);

218 "Multiple restore points not yet supported!");

220 MachineBasicBlock *RestoreBlock = RestorePoint.first;

221

222

223

225 RestoreBlocks.push_back(RestoreBlock);

226 return;

227 }

228

229

231 for (MachineBasicBlock &MBB : MF) {

236 }

237}

238

239

240

243

245 EntryBB.addLiveIn(CSIReg.getReg());

247}

248

249bool SILowerSGPRSpills::spillCalleeSavedRegs(

250 MachineFunction &MF, SmallVectorImpl &CalleeSavedFIs) {

253 const GCNSubtarget &ST = MF.getSubtarget();

254 const SIFrameLowering *TFI = ST.getFrameLowering();

256 RegScavenger *RS = nullptr;

257

258

259 BitVector SavedRegs;

260 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);

261

262

263 if (F.hasFnAttribute(Attribute::Naked)) {

264

265

267

268 std::vector CSI;

269 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();

270

271 for (unsigned I = 0; CSRegs[I]; ++I) {

272 MCRegister Reg = CSRegs[I];

273

274 if (SavedRegs.test(Reg)) {

275 const TargetRegisterClass *RC =

276 TRI->getMinimalPhysRegClass(Reg, MVT::i32);

278 TRI->getSpillAlign(*RC), true);

279

280 CSI.emplace_back(Reg, JunkFI);

281 CalleeSavedFIs.push_back(JunkFI);

282 }

283 }

284

285 if (!CSI.empty()) {

286 for (MachineBasicBlock *SaveBlock : SaveBlocks)

288

289

290 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");

292

293 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)

295 return true;

296 }

297 }

298

299 return false;

300}

301

302void SILowerSGPRSpills::updateLaneVGPRDomInstr(

304 DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {

305

306

307

308

309

310

311 SIMachineFunctionInfo *FuncInfo =

316 for (auto &Spill : VGPRSpills) {

317 if (PrevLaneVGPR == Spill.VGPR)

318 continue;

319

320 PrevLaneVGPR = Spill.VGPR;

321 auto I = LaneVGPRDomInstr.find(Spill.VGPR);

322 if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {

323

324 LaneVGPRDomInstr[Spill.VGPR] = InsertPt;

325 } else {

327 auto PrevInsertPt = I->second;

328 MachineBasicBlock *DomMBB = PrevInsertPt->getParent();

329 if (DomMBB == MBB) {

330

331

332

333

334 if (MDT->dominates(&*InsertPt, &*PrevInsertPt))

335 I->second = InsertPt;

336

337 continue;

338 }

339

340

341

343 if (DomMBB == MBB)

344 I->second = InsertPt;

345 else if (DomMBB != PrevInsertPt->getParent())

347 }

348 }

349}

350

351void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,

352 BitVector &RegMask) {

353

354

355 SIMachineFunctionInfo *MFI = MF.getInfo();

357 BitVector ReservedRegs = TRI->getReservedRegs(MF);

358 BitVector NonWwmAllocMask(TRI->getNumRegs());

359 const GCNSubtarget &ST = MF.getSubtarget();

360

361

362

363

364 unsigned NumRegs = MaxNumVGPRsForWwmAllocation;

365 NumRegs =

366 std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);

367

368 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());

369

370

371 unsigned I = 0;

372 for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;

373 (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {

374 if (!ReservedRegs.test(Reg) &&

375 MRI.isPhysRegUsed(Reg, true)) {

376 TRI->markSuperRegs(RegMask, Reg);

377 ++I;

378 }

379 }

380

381 if (I != NumRegs) {

382

383 TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);

385 "cannot find enough VGPRs for wwm-regalloc");

386 }

387}

388

389bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {

390 auto *LISWrapper = getAnalysisIfAvailable();

391 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;

392 auto *SIWrapper = getAnalysisIfAvailable();

393 SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;

394 MachineDominatorTree *MDT =

395 &getAnalysis().getDomTree();

396 return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);

397}

398

399bool SILowerSGPRSpills::run(MachineFunction &MF) {

400 const GCNSubtarget &ST = MF.getSubtarget();

401 TII = ST.getInstrInfo();

403

405

406

407

408 calculateSaveRestoreBlocks(MF);

409 SmallVector CalleeSavedFIs;

410 bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);

411

414 SIMachineFunctionInfo *FuncInfo = MF.getInfo();

415

417 SaveBlocks.clear();

418 RestoreBlocks.clear();

419 return false;

420 }

421

422 bool MadeChange = false;

423 bool SpilledToVirtVGPRLanes = false;

424

425

426

427 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&

429 if (HasSGPRSpillToVGPR) {

430

431

432

433

434

435

436

438

439

440 DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;

441

442 for (MachineBasicBlock &MBB : MF) {

444 if (TII->isSGPRSpill(MI))

445 continue;

446

447 if (MI.getOperand(0).isUndef()) {

448 if (Indexes)

450 MI.eraseFromParent();

451 continue;

452 }

453

454 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();

456

458 if (IsCalleeSaveSGPRSpill) {

459

460

461

462

463

464

465

466

467

468

470 MF, FI, true)) {

471 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(

472 MI, FI, nullptr, Indexes, LIS, true);

473 if (!Spilled)

475 "failed to spill SGPR to physical VGPR lane when allocated");

476 }

477 } else {

478 MachineInstrSpan MIS(&MI, &MBB);

480 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(

481 MI, FI, nullptr, Indexes, LIS);

482 if (!Spilled)

484 "failed to spill SGPR to virtual VGPR lane when allocated");

485 SpillFIs.set(FI);

486 updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr);

487 SpilledToVirtVGPRLanes = true;

488 }

489 }

490 }

491 }

492

494 auto InsertPt = LaneVGPRDomInstr[Reg];

495

496 MachineBasicBlock &Block = *InsertPt->getParent();

498 auto MIB =

500

501

503

504

506 if (LIS) {

509 }

510 }

511

512

513

515 BitVector WwmRegMask(TRI->getNumRegs());

516

517 determineRegsForWWMAllocation(MF, WwmRegMask);

518

519 BitVector NonWwmRegMask(WwmRegMask);

520 NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask());

521

522

523

525 }

526

527 for (MachineBasicBlock &MBB : MF) {

528

529

530

531

532 for (MachineInstr &MI : MBB) {

533 if (MI.isDebugValue()) {

534 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;

535 if (MI.getOperand(StackOperandIdx).isFI() &&

537 MI.getOperand(StackOperandIdx).getIndex()) &&

538 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {

539 MI.getOperand(StackOperandIdx)

540 .ChangeToRegister(Register(), false );

541 }

542 }

543 }

544 }

545

546

547

548

549

550

552

553 MadeChange = true;

554 }

555

556 if (SpilledToVirtVGPRLanes) {

557 const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();

558

559

560

561 Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);

562 if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <

565 } else {

566

567

569 }

570

571 SaveBlocks.clear();

572 RestoreBlocks.clear();

573

574 return MadeChange;

575}

576

577PreservedAnalyses

584 SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);

586}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

Provides AMDGPU specific target descriptions.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

static void insertCSRRestores(MachineBasicBlock &RestoreBlock, std::vector< CalleeSavedInfo > &CSI)

Insert restore code for the callee-saved registers used in the function.

SmallVector< MachineBasicBlock *, 4 > MBBVector

static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI)

Insert spill code for the callee-saved registers used in the function.

static void updateLiveness(MachineFunction &MF)

Helper function to update the liveness information for the callee-saved registers.

This file declares the machine register scavenger class.

static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB, const TargetRegisterInfo *TRI)

Definition SILowerSGPRSpills.cpp:103

static void insertCSRRestores(MachineBasicBlock &RestoreBlock, MutableArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)

Insert restore code for the callee-saved registers used in the function.

Definition SILowerSGPRSpills.cpp:162

static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)

Insert spill code for the callee-saved registers used in the function.

Definition SILowerSGPRSpills.cpp:114

PassT::Result * getCachedResult(IRUnitT &IR) const

Get the cached result of an analysis pass for a given IR unit.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

void setPreservesAll()

Set by analyses that do not transform their input at all.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

bool test(unsigned Idx) const

The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...

iterator find(const_arg_type_t< KeyT > Val)

NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const

Find nearest common dominator basic block for basic block A and B.

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)

emitError - Emit an error message to the currently installed error handler with optional location inf...

void removeAllRegUnitsForPhysReg(MCRegister Reg)

Remove associated live ranges for the register units associated with Reg.

SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)

LiveInterval & createAndComputeVirtRegInterval(Register Reg)

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

MCRegAliasIterator enumerates all registers aliasing Reg.

Wrapper class representing physical registers. Should be passed by value.

An RAII based helper class to modify MachineFunctionProperties when running pass.

bool isEHFuncletEntry() const

Returns true if this is the entry block of an EH funclet.

LLVM_ABI iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

bool isReturnBlock() const

Convenience function that returns true if the block ends in a return instruction.

LLVM_ABI void sortUniqueLiveIns()

Sorts and uniques the LiveIns vector.

LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)

Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.

void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())

Adds the specified register as a live in.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

MachineInstrBundleIterator< MachineInstr > iterator

Analysis pass which computes a MachineDominatorTree.

LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &)

Analysis pass which computes a MachineDominatorTree.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

bool dominates(const MachineInstr *A, const MachineInstr *B) const

LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)

Create a new statically sized stack object, returning a nonnegative identifier to represent it.

void setCalleeSavedInfoValid(bool v)

int getObjectIndexEnd() const

Return one past the maximum frame object index.

bool hasStackObjects() const

Return true if there are any stack objects in this function.

uint8_t getStackID(int ObjectIdx) const

const SaveRestorePoints & getRestorePoints() const

bool isFixedObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a fixed stack object.

const SaveRestorePoints & getSavePoints() const

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

Properties which a MachineFunction may have at a given point in time.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineBasicBlock & front() const

MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...

MachineBasicBlock::iterator begin()

Representation of each machine instruction.

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Wrapper class representing virtual and physical registers.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition SILowerSGPRSpills.cpp:578

void setSGPRForEXECCopy(Register Reg)

void setFlag(Register Reg, uint8_t Flag)

ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const

Register getSGPRForEXECCopy() const

bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)

bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)

If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.

void updateNonWWMRegMask(BitVector &RegMask)

bool hasSpilledSGPRs() const

ArrayRef< Register > getSGPRSpillVGPRs() const

SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)

Insert the given machine instruction into the mapping.

LLVM_ABI void removeMachineInstrFromMaps(MachineInstr &MI, bool AllowBundled=false)

Removes machine instruction (bundle) MI from the mapping.

LLVM_ABI void repairIndexesInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End)

Repair indexes after adding and removing instructions.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Information about stack frame layout on the target.

void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const

virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const

spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...

virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const

restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...

TargetInstrInfo - Interface to description of machine instruction set.

const TargetRegisterInfo & getRegisterInfo() const

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

virtual const TargetFrameLowering * getFrameLowering() const

virtual const TargetInstrInfo * getInstrInfo() const

virtual const TargetRegisterInfo * getRegisterInfo() const =0

Return the target's register information.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

auto reverse(ContainerTy &&C)

char & SILowerSGPRSpillsLegacyID

Definition SILowerSGPRSpills.cpp:101

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

ArrayRef(const T &OneElt) -> ArrayRef< T >

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.