LLVM: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

24#include

25#include

26#include

27

29

30using namespace llvm;

31

32

33

34

35

37 "amdgpu-mfma-vgpr-form",

38 cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If "

39 "unspecified, default to compiler heuristics"),

42

47

49

53 UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),

54 WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),

55 PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),

56 WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),

57 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0),

58 IsWholeWaveFunction(F.getCallingConv() ==

61 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);

62 WavesPerEU = ST.getWavesPerEU(F);

63 MaxNumWorkGroups = ST.getMaxNumWorkGroups(F);

64 assert(MaxNumWorkGroups.size() == 3);

65

66

67

69 if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())

70 DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();

71

72 Occupancy = ST.computeOccupancy(F, getLDSSize()).second;

74

75 VRegFlags.reserve(1024);

76

79

80 if (IsKernel) {

81 WorkGroupIDX = true;

82 WorkItemIDX = true;

85 }

86

87 if (ST.hasGFX90AInsts()) {

88

89

90 auto [MinNumAGPRAttr, MaxNumAGPRAttr] =

92 true);

93 MinNumAGPRs = MinNumAGPRAttr;

94 }

95

97

98

99

100

101 StackPtrOffsetReg = AMDGPU::SGPR32;

102

103 ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;

104

105 ArgInfo.PrivateSegmentBuffer =

107

108 ImplicitArgPtr = false;

113

114 FrameOffsetReg = AMDGPU::SGPR33;

115 StackPtrOffsetReg = AMDGPU::SGPR32;

116

117 if (!ST.enableFlatScratch()) {

118

119

120 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;

121

122 ArgInfo.PrivateSegmentBuffer =

124 }

125

126 if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))

127 ImplicitArgPtr = true;

128 } else {

129 ImplicitArgPtr = false;

131 std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);

132 }

133

136 ST.hasArchitectedSGPRs())) {

137 if (IsKernel || F.hasFnAttribute("amdgpu-no-workgroup-id-x") ||

138 F.hasFnAttribute("amdgpu-no-cluster-id-x"))

139 WorkGroupIDX = true;

140

141 if (F.hasFnAttribute("amdgpu-no-workgroup-id-y") ||

142 F.hasFnAttribute("amdgpu-no-cluster-id-y"))

143 WorkGroupIDY = true;

144

145 if (F.hasFnAttribute("amdgpu-no-workgroup-id-z") ||

146 F.hasFnAttribute("amdgpu-no-cluster-id-z"))

147 WorkGroupIDZ = true;

148 }

149

151 if (IsKernel || F.hasFnAttribute("amdgpu-no-workitem-id-x"))

152 WorkItemIDX = true;

153

154 if (F.hasFnAttribute("amdgpu-no-workitem-id-y") &&

155 ST.getMaxWorkitemID(F, 1) != 0)

156 WorkItemIDY = true;

157

158 if (F.hasFnAttribute("amdgpu-no-workitem-id-z") &&

159 ST.getMaxWorkitemID(F, 2) != 0)

160 WorkItemIDZ = true;

161

162 if (!IsKernel && F.hasFnAttribute("amdgpu-no-lds-kernel-id"))

163 LDSKernelId = true;

164 }

165

167

168

169 if (WorkItemIDZ)

170 WorkItemIDY = true;

171

172 if (!ST.flatScratchIsArchitected()) {

173 PrivateSegmentWaveByteOffset = true;

174

175

178 ArgInfo.PrivateSegmentWaveByteOffset =

180 }

181 }

182

183 Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");

187

188 A = F.getFnAttribute("amdgpu-32bit-address-high-bits");

189 S = A.getValueAsString();

192

193 MaxMemoryClusterDWords = F.getFnAttributeAsParsedInteger(

195

196

197

198

199 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

200 VGPRForAGPRCopy =

201 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);

202 }

203

205}

206

213

217 limitOccupancy(ST.getOccupancyWithWorkGroupSizes(MF).second);

218}

219

222 ArgInfo.PrivateSegmentBuffer =

224 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));

225 NumUserSGPRs += 4;

226 return ArgInfo.PrivateSegmentBuffer.getRegister();

227}

228

231 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

232 NumUserSGPRs += 2;

233 return ArgInfo.DispatchPtr.getRegister();

234}

235

238 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

239 NumUserSGPRs += 2;

240 return ArgInfo.QueuePtr.getRegister();

241}

242

244 ArgInfo.KernargSegmentPtr

246 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

247 NumUserSGPRs += 2;

248 return ArgInfo.KernargSegmentPtr.getRegister();

249}

250

253 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

254 NumUserSGPRs += 2;

255 return ArgInfo.DispatchID.getRegister();

256}

257

260 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

261 NumUserSGPRs += 2;

262 return ArgInfo.FlatScratchInit.getRegister();

263}

264

267 NumUserSGPRs += 1;

268 return ArgInfo.PrivateSegmentSize.getRegister();

269}

270

273 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

274 NumUserSGPRs += 2;

275 return ArgInfo.ImplicitBufferPtr.getRegister();

276}

277

280 NumUserSGPRs += 1;

281 return ArgInfo.LDSKernelId.getRegister();

282}

283

286 unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {

287 auto [It, Inserted] = ArgInfo.PreloadKernArgs.try_emplace(KernArgIdx);

288 assert(Inserted && "Preload kernel argument allocated twice.");

289 NumUserSGPRs += PaddingSGPRs;

290

291

292

293 if (!ArgInfo.FirstKernArgPreloadReg)

294 ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR();

296 TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);

297 auto &Regs = It->second.Regs;

298 if (PreloadReg &&

299 (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {

300 Regs.push_back(PreloadReg);

301 NumUserSGPRs += AllocSizeDWord;

302 } else {

303 Regs.reserve(AllocSizeDWord);

304 for (unsigned I = 0; I < AllocSizeDWord; ++I) {

305 Regs.push_back(getNextUserSGPR());

306 NumUserSGPRs++;

307 }

308 }

309

310

311 UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);

312 return &Regs;

313}

314

317

319 return;

320

321

322

323

324

325

326

327

328

329

333 return;

334

335 WWMSpills.insert(std::make_pair(

337}

338

339

342 SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,

343 SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {

345 for (auto &Reg : WWMSpills) {

347 CalleeSavedRegs.push_back(Reg);

348 else

349 ScratchRegs.push_back(Reg);

350 }

351}

352

355 for (unsigned I = 0; CSRegs[I]; ++I) {

356 if (CSRegs[I] == Reg)

357 return true;

358 }

359

360 return false;

361}

362

368 for (unsigned I = 0, E = WWMVGPRs.size(); I < E; ++I) {

371 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

372 if (!NewReg || NewReg >= Reg)

373 break;

374

375 MRI.replaceRegWith(Reg, NewReg);

376

377

378 WWMVGPRs[I] = NewReg;

379 WWMReservedRegs.remove(Reg);

380 WWMReservedRegs.insert(NewReg);

381 MRI.reserveReg(NewReg, TRI);

382

383

384

385 auto *RegItr = llvm::find(SpillPhysVGPRs, Reg);

386 if (RegItr != SpillPhysVGPRs.end()) {

387 unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr);

388 SpillPhysVGPRs[Idx] = NewReg;

389 }

390

391

392

393 SavedVGPRs.reset(Reg);

394

396 MBB.removeLiveIn(Reg);

397 MBB.sortUniqueLiveIns();

398 }

399

400 Reg = NewReg;

401 }

402}

403

404bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(

408 if (!LaneIndex) {

409 LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

410 SpillVGPRs.push_back(LaneVGPR);

411 } else {

412 LaneVGPR = SpillVGPRs.back();

413 }

414

415 SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);

416 return true;

417}

418

419bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(

420 MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) {

421 const GCNSubtarget &ST = MF.getSubtarget();

422 const SIRegisterInfo *TRI = ST.getRegisterInfo();

425 if (!LaneIndex) {

426

427

428

429 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF,

430 !IsPrologEpilog);

431 if (LaneVGPR == AMDGPU::NoRegister) {

432

433

434 SGPRSpillsToPhysicalVGPRLanes.erase(FI);

435 return false;

436 }

437

438 if (IsPrologEpilog)

440

442 for (MachineBasicBlock &MBB : MF) {

445 }

446 SpillPhysVGPRs.push_back(LaneVGPR);

447 } else {

448 LaneVGPR = SpillPhysVGPRs.back();

449 }

450

451 SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);

452 return true;

453}

454

457 bool IsPrologEpilog) {

458 std::vectorSIRegisterInfo::SpilledReg &SpillLanes =

459 SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]

460 : SGPRSpillsToVirtualVGPRLanes[FI];

461

462

463 if (!SpillLanes.empty())

464 return true;

465

468 unsigned WaveSize = ST.getWavefrontSize();

469

470 unsigned Size = FrameInfo.getObjectSize(FI);

471 unsigned NumLanes = Size / 4;

472

473 if (NumLanes > WaveSize)

474 return false;

475

476 assert(Size >= 4 && "invalid sgpr spill size");

477 assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&

478 "not spilling SGPRs to VGPRs");

479

480 unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes

481 : NumVirtualVGPRSpillLanes;

482

483 for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {

484 unsigned LaneIndex = (NumSpillLanes % WaveSize);

485

486 bool Allocated = SpillToPhysVGPRLane

487 ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,

488 IsPrologEpilog)

489 : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);

490 if (!Allocated) {

491 NumSpillLanes -= I;

492 return false;

493 }

494 }

495

496 return true;

497}

498

499

500

501

503 int FI,

504 bool isAGPRtoVGPR) {

508

510

511 auto &Spill = VGPRToAGPRSpills[FI];

512

513

514 if (!Spill.Lanes.empty())

515 return Spill.FullyAllocated;

516

518 unsigned NumLanes = Size / 4;

519 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);

520

522 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;

524

525 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;

527 Spill.FullyAllocated = true;

528

529

530

532 OtherUsedRegs.resize(TRI->getNumRegs());

533

536 if (CSRMask)

538

539

540

542 OtherUsedRegs.set(Reg);

544 OtherUsedRegs.set(Reg);

545

547 for (int I = NumLanes - 1; I >= 0; --I) {

548 NextSpillReg = std::find_if(

549 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {

550 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&

551 !OtherUsedRegs[Reg];

552 });

553

554 if (NextSpillReg == Regs.end()) {

555 Spill.FullyAllocated = false;

556 break;

557 }

558

559 OtherUsedRegs.set(*NextSpillReg);

560 SpillRegs.push_back(*NextSpillReg);

561 MRI.reserveReg(*NextSpillReg, TRI);

562 Spill.Lanes[I] = *NextSpillReg++;

563 }

564

565 return Spill.FullyAllocated;

566}

567

570

571

572

573

574

575

578 SGPRSpillsToVirtualVGPRLanes.erase(R.first);

579 }

580

581

582

583 if (!ResetSGPRSpillStackIDs) {

586 SGPRSpillsToPhysicalVGPRLanes.erase(R.first);

587 }

588 }

589 bool HaveSGPRToMemory = false;

590

591 if (ResetSGPRSpillStackIDs) {

592

593

595 ++I) {

599 HaveSGPRToMemory = true;

600 }

601 }

602 }

603 }

604

605 for (auto &R : VGPRToAGPRSpills) {

606 if (R.second.IsDead)

608 }

609

610 return HaveSGPRToMemory;

611}

612

615 if (ScavengeFI)

616 return *ScavengeFI;

617

618 ScavengeFI =

620 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);

621 return *ScavengeFI;

622}

623

624MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {

625 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");

626 return AMDGPU::SGPR0 + NumUserSGPRs;

627}

628

629MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {

630 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;

631}

632

633void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {

634 VRegFlags.grow(Reg);

635}

636

637void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,

639 VRegFlags.grow(NewReg);

640 VRegFlags[NewReg] = VRegFlags[SrcReg];

641}

642

646 if (!ST.isAmdPalOS())

648 Register GitPtrLo = AMDGPU::SGPR0;

649 if (ST.hasMergedShaders()) {

653

654

655 GitPtrLo = AMDGPU::SGPR8;

656 return GitPtrLo;

657 default:

658 return GitPtrLo;

659 }

660 }

661 return GitPtrLo;

662}

663

667 {

670 }

671 return Dest;

672}

673

674static std::optionalyaml::SIArgumentInfo

678

679 auto convertArg = [&](std::optionalyaml::SIArgument &A,

681 if (!Arg)

682 return false;

683

684

686 if (Arg.isRegister()) {

689 } else

691

692 if (Arg.isMasked())

693 SA.Mask = Arg.getMask();

694

695 A = SA;

696 return true;

697 };

698

699 bool Any = false;

713 ArgInfo.PrivateSegmentWaveByteOffset);

719

720

721

722 if (ArgInfo.FirstKernArgPreloadReg) {

725 "FirstKernArgPreloadReg must be a physical register");

726

730

732 Any = true;

733 }

734

736 return AI;

737

738 return std::nullopt;

739}

740

771

774

779

782

784 if (SFI)

786}

787

791

816

818

821 if (!FIOrErr) {

822

825

828 "", {}, {});

829 SourceRange = YamlMFI.ScavengeFI->SourceRange;

830 return true;

831 }

832 ScavengeFI = *FIOrErr;

833 } else {

834 ScavengeFI = std::nullopt;

835 }

836 return false;

837}

838

840 auto [MinNumAGPR, MaxNumAGPR] =

842 true);

843 return MinNumAGPR != 0u;

844}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Provides AMDGPU specific target descriptions.

Base class for AMDGPU specific classes of TargetSubtarget.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

static cl::opt< bool, true > MFMAVGPRFormOpt("amdgpu-mfma-vgpr-form", cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If " "unspecified, default to compiler heuristics"), cl::location(SIMachineFunctionInfo::MFMAVGPRForm), cl::init(false), cl::Hidden)

const GCNTargetMachine & getTM(const GCNSubtarget *STI)

Definition SIMachineFunctionInfo.cpp:43

static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:675

@ MAX_LANES

Definition SIMachineFunctionInfo.cpp:28

static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:664

Interface definition for SIRegisterInfo.

static const AMDGPUFunctionArgInfo FixedABIFunctionInfo

AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST)

uint32_t getLDSSize() const

Align DynLDSAlign

Align for dynamic shared memory if any.

uint32_t LDSSize

Number of bytes in the LDS that are being used.

bool isChainFunction() const

uint64_t ExplicitKernArgSize

bool hasInitWholeWave() const

bool isEntryFunction() const

static ClusterDimsAttr get(const Function &F)

Functions, function parameters, and return types can have attributes to indicate how they should be t...

void resize(unsigned N, bool t=false)

resize - Grow or shrink the bitvector.

void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

setBitsInMask - Add '1' bits from Mask to this vector.

Lightweight error class with error context and mandatory checking.

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

const SITargetLowering * getTargetLowering() const override

LLVM_ABI void sortUniqueLiveIns()

Sorts and uniques the LiveIns vector.

void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())

Adds the specified register as a live in.

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)

Create a new statically sized stack object, returning a nonnegative identifier to represent it.

LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)

Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...

void setStackID(int ObjectIdx, uint8_t ID)

bool hasTailCall() const

Returns true if the function contains a tail call.

bool isSpillSlotObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a spill slot.

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

void RemoveStackObject(int ObjectIdx)

Remove or mark dead a statically sized stack object.

int getObjectIndexEnd() const

Return one past the maximum frame object index.

uint8_t getStackID(int ObjectIdx) const

int getObjectIndexBegin() const

Return the minimum frame object index.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * cloneInfo(const Ty &Old)

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const

Returns list of callee saved registers.

This interface provides simple read-only access to a block of memory, and provides simple methods for...

virtual StringRef getBufferIdentifier() const

Return an identifier for this buffer, typically the filename it was read from.

Wrapper class representing virtual and physical registers.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)

Definition SIMachineFunctionInfo.cpp:792

void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)

Definition SIMachineFunctionInfo.cpp:363

Register addPrivateSegmentSize(const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:265

void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))

Definition SIMachineFunctionInfo.cpp:315

Register addDispatchPtr(const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:229

Register getLongBranchReservedReg() const

Register addFlatScratchInit(const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:258

unsigned getMaxWavesPerEU() const

ArrayRef< Register > getSGPRSpillPhysVGPRs() const

int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:613

Register addQueuePtr(const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:236

SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default

Register getGITPtrLoReg(const MachineFunction &MF) const

Definition SIMachineFunctionInfo.cpp:644

bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)

Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.

Definition SIMachineFunctionInfo.cpp:502

void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const

Definition SIMachineFunctionInfo.cpp:340

Register getSGPRForEXECCopy() const

bool mayUseAGPRs(const Function &F) const

Definition SIMachineFunctionInfo.cpp:839

bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const

Definition SIMachineFunctionInfo.cpp:353

Register addLDSKernelId()

Definition SIMachineFunctionInfo.cpp:278

Register getVGPRForAGPRCopy() const

bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)

Definition SIMachineFunctionInfo.cpp:455

Register addKernargSegmentPtr(const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:243

Register addDispatchID(const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:251

bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)

If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.

Definition SIMachineFunctionInfo.cpp:568

MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override

Make a functionally equivalent copy of this MachineFunctionInfo in MF.

Definition SIMachineFunctionInfo.cpp:207

bool checkIndexInPrologEpilogSGPRSpills(int FI) const

Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:220

const ReservedRegSet & getWWMReservedRegs() const

std::optional< int > getOptionalScavengeFI() const

Register addImplicitBufferPtr(const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:271

void limitOccupancy(const MachineFunction &MF)

Definition SIMachineFunctionInfo.cpp:214

SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)

Definition SIMachineFunctionInfo.cpp:284

void reserveWWMRegister(Register Reg)

static bool isChainScratchRegister(Register VGPR)

Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...

Represents a location in source code.

Represents a range in source code.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

typename SuperClass::const_iterator const_iterator

unsigned getMainFileID() const

const MemoryBuffer * getMemoryBuffer(unsigned i) const

StringRef - Represent a constant reference to a string, i.e.

bool consumeInteger(unsigned Radix, T &Result)

Parse the current string as an integer of the specified radix.

constexpr bool empty() const

empty - Check if the string is empty.

const TargetMachine & getTargetMachine() const

ArrayRef< MCPhysReg > getRegisters() const

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

A raw_ostream that writes to an std::string.

unsigned getInitialPSInputAddr(const Function &F)

unsigned getDynamicVGPRBlockSize(const Function &F)

LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)

std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)

LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)

CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ AMDGPU_Gfx

Used for AMD graphics targets.

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).

@ AMDGPU_GS

Used for Mesa/AMDPAL geometry shaders.

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

@ SPIR_KERNEL

Used for SPIR kernel functions.

initializer< Ty > init(const Ty &Val)

LocationClass< Ty > location(Ty &L)

This is an optimization pass for GlobalISel generic memory operations.

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)

constexpr unsigned DefaultMemoryClusterDWordsLimit

BumpPtrAllocatorImpl<> BumpPtrAllocator

The standard BumpPtrAllocator which just uses the default template parameters.

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

This struct is a compact representation of a valid (non-zero power of two) alignment.

static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)

Helper struct shared between Function Specialization and SCCP Solver.

MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...

A serializaable representation of a reference to a stack object or fixed stack object.

This class should be specialized by any type that needs to be converted to/from a YAML mapping.

std::optional< SIArgument > PrivateSegmentWaveByteOffset

std::optional< SIArgument > WorkGroupIDY

std::optional< SIArgument > FlatScratchInit

std::optional< SIArgument > DispatchPtr

std::optional< SIArgument > DispatchID

std::optional< SIArgument > WorkItemIDY

std::optional< SIArgument > WorkGroupIDX

std::optional< SIArgument > ImplicitArgPtr

std::optional< SIArgument > QueuePtr

std::optional< SIArgument > WorkGroupInfo

std::optional< SIArgument > LDSKernelId

std::optional< SIArgument > ImplicitBufferPtr

std::optional< SIArgument > WorkItemIDX

std::optional< SIArgument > KernargSegmentPtr

std::optional< SIArgument > WorkItemIDZ

std::optional< SIArgument > PrivateSegmentSize

std::optional< SIArgument > PrivateSegmentBuffer

std::optional< SIArgument > FirstKernArgPreloadReg

std::optional< SIArgument > WorkGroupIDZ

std::optional< unsigned > Mask

static SIArgument createArgument(bool IsReg)

unsigned MaxMemoryClusterDWords

StringValue SGPRForEXECCopy

SmallVector< StringValue > WWMReservedRegs

uint32_t HighBitsOf32BitAddress

SIMachineFunctionInfo()=default

StringValue FrameOffsetReg

StringValue LongBranchReservedReg

unsigned NumKernargPreloadSGPRs

uint64_t ExplicitKernArgSize

uint16_t NumWaveDispatchSGPRs

void mappingImpl(yaml::IO &YamlIO) override

Definition SIMachineFunctionInfo.cpp:788

unsigned DynamicVGPRBlockSize

StringValue VGPRForAGPRCopy

std::optional< SIArgumentInfo > ArgInfo

SmallVector< StringValue, 2 > SpillPhysVGPRS

std::optional< FrameIndex > ScavengeFI

uint16_t NumWaveDispatchVGPRs

unsigned BytesInStackArgArea

unsigned ScratchReservedForDynamicVGPRs

StringValue ScratchRSrcReg

StringValue StackPtrOffsetReg

A wrapper around std::string which contains a source range that's being set during parsing.