LLVM: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
24#include
25#include
26#include
27
29
30using namespace llvm;
31
32
33
34
35
37 "amdgpu-mfma-vgpr-form",
38 cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If "
39 "unspecified, default to compiler heuristics"),
42
47
49
53 UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),
54 WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
55 PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
56 WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
57 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0),
58 IsWholeWaveFunction(F.getCallingConv() ==
61 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
62 WavesPerEU = ST.getWavesPerEU(F);
63 MaxNumWorkGroups = ST.getMaxNumWorkGroups(F);
64 assert(MaxNumWorkGroups.size() == 3);
65
66
67
69 if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())
70 DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();
71
72 Occupancy = ST.computeOccupancy(F, getLDSSize()).second;
74
75 VRegFlags.reserve(1024);
76
79
80 if (IsKernel) {
81 WorkGroupIDX = true;
82 WorkItemIDX = true;
85 }
86
87 if (ST.hasGFX90AInsts()) {
88
89
90 auto [MinNumAGPRAttr, MaxNumAGPRAttr] =
92 true);
93 MinNumAGPRs = MinNumAGPRAttr;
94 }
95
97
98
99
100
101 StackPtrOffsetReg = AMDGPU::SGPR32;
102
103 ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
104
105 ArgInfo.PrivateSegmentBuffer =
107
108 ImplicitArgPtr = false;
113
114 FrameOffsetReg = AMDGPU::SGPR33;
115 StackPtrOffsetReg = AMDGPU::SGPR32;
116
117 if (!ST.enableFlatScratch()) {
118
119
120 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
121
122 ArgInfo.PrivateSegmentBuffer =
124 }
125
126 if (.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
127 ImplicitArgPtr = true;
128 } else {
129 ImplicitArgPtr = false;
131 std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);
132 }
133
136 ST.hasArchitectedSGPRs())) {
137 if (IsKernel || .hasFnAttribute("amdgpu-no-workgroup-id-x") ||
138 .hasFnAttribute("amdgpu-no-cluster-id-x"))
139 WorkGroupIDX = true;
140
141 if (.hasFnAttribute("amdgpu-no-workgroup-id-y") ||
142 .hasFnAttribute("amdgpu-no-cluster-id-y"))
143 WorkGroupIDY = true;
144
145 if (.hasFnAttribute("amdgpu-no-workgroup-id-z") ||
146 .hasFnAttribute("amdgpu-no-cluster-id-z"))
147 WorkGroupIDZ = true;
148 }
149
151 if (IsKernel || .hasFnAttribute("amdgpu-no-workitem-id-x"))
152 WorkItemIDX = true;
153
154 if (.hasFnAttribute("amdgpu-no-workitem-id-y") &&
155 ST.getMaxWorkitemID(F, 1) != 0)
156 WorkItemIDY = true;
157
158 if (.hasFnAttribute("amdgpu-no-workitem-id-z") &&
159 ST.getMaxWorkitemID(F, 2) != 0)
160 WorkItemIDZ = true;
161
162 if (!IsKernel && .hasFnAttribute("amdgpu-no-lds-kernel-id"))
163 LDSKernelId = true;
164 }
165
167
168
169 if (WorkItemIDZ)
170 WorkItemIDY = true;
171
172 if (!ST.flatScratchIsArchitected()) {
173 PrivateSegmentWaveByteOffset = true;
174
175
178 ArgInfo.PrivateSegmentWaveByteOffset =
180 }
181 }
182
183 Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
187
188 A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
189 S = A.getValueAsString();
192
193 MaxMemoryClusterDWords = F.getFnAttributeAsParsedInteger(
195
196
197
198
199 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
200 VGPRForAGPRCopy =
201 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
202 }
203
205}
206
213
217 limitOccupancy(ST.getOccupancyWithWorkGroupSizes(MF).second);
218}
219
222 ArgInfo.PrivateSegmentBuffer =
224 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
225 NumUserSGPRs += 4;
226 return ArgInfo.PrivateSegmentBuffer.getRegister();
227}
228
231 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
232 NumUserSGPRs += 2;
233 return ArgInfo.DispatchPtr.getRegister();
234}
235
238 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
239 NumUserSGPRs += 2;
240 return ArgInfo.QueuePtr.getRegister();
241}
242
244 ArgInfo.KernargSegmentPtr
246 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
247 NumUserSGPRs += 2;
248 return ArgInfo.KernargSegmentPtr.getRegister();
249}
250
253 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
254 NumUserSGPRs += 2;
255 return ArgInfo.DispatchID.getRegister();
256}
257
260 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
261 NumUserSGPRs += 2;
262 return ArgInfo.FlatScratchInit.getRegister();
263}
264
267 NumUserSGPRs += 1;
268 return ArgInfo.PrivateSegmentSize.getRegister();
269}
270
273 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
274 NumUserSGPRs += 2;
275 return ArgInfo.ImplicitBufferPtr.getRegister();
276}
277
280 NumUserSGPRs += 1;
281 return ArgInfo.LDSKernelId.getRegister();
282}
283
286 unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {
287 auto [It, Inserted] = ArgInfo.PreloadKernArgs.try_emplace(KernArgIdx);
288 assert(Inserted && "Preload kernel argument allocated twice.");
289 NumUserSGPRs += PaddingSGPRs;
290
291
292
293 if (!ArgInfo.FirstKernArgPreloadReg)
294 ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR();
296 TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
297 auto &Regs = It->second.Regs;
298 if (PreloadReg &&
299 (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
300 Regs.push_back(PreloadReg);
301 NumUserSGPRs += AllocSizeDWord;
302 } else {
303 Regs.reserve(AllocSizeDWord);
304 for (unsigned I = 0; I < AllocSizeDWord; ++I) {
305 Regs.push_back(getNextUserSGPR());
306 NumUserSGPRs++;
307 }
308 }
309
310
311 UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);
312 return &Regs;
313}
314
317
319 return;
320
321
322
323
324
325
326
327
328
329
333 return;
334
335 WWMSpills.insert(std::make_pair(
337}
338
339
342 SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
343 SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {
345 for (auto &Reg : WWMSpills) {
347 CalleeSavedRegs.push_back(Reg);
348 else
349 ScratchRegs.push_back(Reg);
350 }
351}
352
355 for (unsigned I = 0; CSRegs[I]; ++I) {
356 if (CSRegs[I] == Reg)
357 return true;
358 }
359
360 return false;
361}
362
368 for (unsigned I = 0, E = WWMVGPRs.size(); I < E; ++I) {
371 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
372 if (!NewReg || NewReg >= Reg)
373 break;
374
375 MRI.replaceRegWith(Reg, NewReg);
376
377
378 WWMVGPRs[I] = NewReg;
379 WWMReservedRegs.remove(Reg);
380 WWMReservedRegs.insert(NewReg);
381 MRI.reserveReg(NewReg, TRI);
382
383
384
385 auto *RegItr = llvm::find(SpillPhysVGPRs, Reg);
386 if (RegItr != SpillPhysVGPRs.end()) {
387 unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr);
388 SpillPhysVGPRs[Idx] = NewReg;
389 }
390
391
392
393 SavedVGPRs.reset(Reg);
394
396 MBB.removeLiveIn(Reg);
397 MBB.sortUniqueLiveIns();
398 }
399
400 Reg = NewReg;
401 }
402}
403
404bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
408 if (!LaneIndex) {
409 LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
410 SpillVGPRs.push_back(LaneVGPR);
411 } else {
412 LaneVGPR = SpillVGPRs.back();
413 }
414
415 SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
416 return true;
417}
418
419bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
420 MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) {
421 const GCNSubtarget &ST = MF.getSubtarget();
422 const SIRegisterInfo *TRI = ST.getRegisterInfo();
425 if (!LaneIndex) {
426
427
428
429 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF,
430 !IsPrologEpilog);
431 if (LaneVGPR == AMDGPU::NoRegister) {
432
433
434 SGPRSpillsToPhysicalVGPRLanes.erase(FI);
435 return false;
436 }
437
438 if (IsPrologEpilog)
440
442 for (MachineBasicBlock &MBB : MF) {
445 }
446 SpillPhysVGPRs.push_back(LaneVGPR);
447 } else {
448 LaneVGPR = SpillPhysVGPRs.back();
449 }
450
451 SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
452 return true;
453}
454
457 bool IsPrologEpilog) {
458 std::vectorSIRegisterInfo::SpilledReg &SpillLanes =
459 SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]
460 : SGPRSpillsToVirtualVGPRLanes[FI];
461
462
463 if (!SpillLanes.empty())
464 return true;
465
468 unsigned WaveSize = ST.getWavefrontSize();
469
470 unsigned Size = FrameInfo.getObjectSize(FI);
471 unsigned NumLanes = Size / 4;
472
473 if (NumLanes > WaveSize)
474 return false;
475
476 assert(Size >= 4 && "invalid sgpr spill size");
477 assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
478 "not spilling SGPRs to VGPRs");
479
480 unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
481 : NumVirtualVGPRSpillLanes;
482
483 for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
484 unsigned LaneIndex = (NumSpillLanes % WaveSize);
485
486 bool Allocated = SpillToPhysVGPRLane
487 ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
488 IsPrologEpilog)
489 : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
490 if (!Allocated) {
491 NumSpillLanes -= I;
492 return false;
493 }
494 }
495
496 return true;
497}
498
499
500
501
503 int FI,
504 bool isAGPRtoVGPR) {
508
510
511 auto &Spill = VGPRToAGPRSpills[FI];
512
513
514 if (!Spill.Lanes.empty())
515 return Spill.FullyAllocated;
516
518 unsigned NumLanes = Size / 4;
519 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
520
522 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
524
525 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
527 Spill.FullyAllocated = true;
528
529
530
532 OtherUsedRegs.resize(TRI->getNumRegs());
533
536 if (CSRMask)
538
539
540
542 OtherUsedRegs.set(Reg);
544 OtherUsedRegs.set(Reg);
545
547 for (int I = NumLanes - 1; I >= 0; --I) {
548 NextSpillReg = std::find_if(
549 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
550 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
551 !OtherUsedRegs[Reg];
552 });
553
554 if (NextSpillReg == Regs.end()) {
555 Spill.FullyAllocated = false;
556 break;
557 }
558
559 OtherUsedRegs.set(*NextSpillReg);
560 SpillRegs.push_back(*NextSpillReg);
561 MRI.reserveReg(*NextSpillReg, TRI);
562 Spill.Lanes[I] = *NextSpillReg++;
563 }
564
565 return Spill.FullyAllocated;
566}
567
570
571
572
573
574
575
578 SGPRSpillsToVirtualVGPRLanes.erase(R.first);
579 }
580
581
582
583 if (!ResetSGPRSpillStackIDs) {
586 SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
587 }
588 }
589 bool HaveSGPRToMemory = false;
590
591 if (ResetSGPRSpillStackIDs) {
592
593
595 ++I) {
599 HaveSGPRToMemory = true;
600 }
601 }
602 }
603 }
604
605 for (auto &R : VGPRToAGPRSpills) {
606 if (R.second.IsDead)
608 }
609
610 return HaveSGPRToMemory;
611}
612
615 if (ScavengeFI)
616 return *ScavengeFI;
617
618 ScavengeFI =
620 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
621 return *ScavengeFI;
622}
623
624MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
625 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
626 return AMDGPU::SGPR0 + NumUserSGPRs;
627}
628
629MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
630 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
631}
632
633void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {
634 VRegFlags.grow(Reg);
635}
636
637void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,
639 VRegFlags.grow(NewReg);
640 VRegFlags[NewReg] = VRegFlags[SrcReg];
641}
642
646 if (!ST.isAmdPalOS())
648 Register GitPtrLo = AMDGPU::SGPR0;
649 if (ST.hasMergedShaders()) {
653
654
655 GitPtrLo = AMDGPU::SGPR8;
656 return GitPtrLo;
657 default:
658 return GitPtrLo;
659 }
660 }
661 return GitPtrLo;
662}
663
667 {
670 }
671 return Dest;
672}
673
674static std::optionalyaml::SIArgumentInfo
678
679 auto convertArg = [&](std::optionalyaml::SIArgument &A,
681 if (!Arg)
682 return false;
683
684
686 if (Arg.isRegister()) {
689 } else
691
692 if (Arg.isMasked())
693 SA.Mask = Arg.getMask();
694
695 A = SA;
696 return true;
697 };
698
699 bool Any = false;
713 ArgInfo.PrivateSegmentWaveByteOffset);
719
720
721
722 if (ArgInfo.FirstKernArgPreloadReg) {
725 "FirstKernArgPreloadReg must be a physical register");
726
730
732 Any = true;
733 }
734
736 return AI;
737
738 return std::nullopt;
739}
740
771
774
779
782
784 if (SFI)
786}
787
791
816
818
821 if (!FIOrErr) {
822
825
828 "", {}, {});
829 SourceRange = YamlMFI.ScavengeFI->SourceRange;
830 return true;
831 }
832 ScavengeFI = *FIOrErr;
833 } else {
834 ScavengeFI = std::nullopt;
835 }
836 return false;
837}
838
840 auto [MinNumAGPR, MaxNumAGPR] =
842 true);
843 return MinNumAGPR != 0u;
844}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static cl::opt< bool, true > MFMAVGPRFormOpt("amdgpu-mfma-vgpr-form", cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If " "unspecified, default to compiler heuristics"), cl::location(SIMachineFunctionInfo::MFMAVGPRForm), cl::init(false), cl::Hidden)
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
Definition SIMachineFunctionInfo.cpp:43
static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:675
@ MAX_LANES
Definition SIMachineFunctionInfo.cpp:28
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:664
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST)
uint32_t getLDSSize() const
Align DynLDSAlign
Align for dynamic shared memory if any.
uint32_t LDSSize
Number of bytes in the LDS that are being used.
bool isChainFunction() const
uint64_t ExplicitKernArgSize
bool hasInitWholeWave() const
bool isEntryFunction() const
static ClusterDimsAttr get(const Function &F)
Functions, function parameters, and return types can have attributes to indicate how they should be t...
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
Lightweight error class with error context and mandatory checking.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
const SITargetLowering * getTargetLowering() const override
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
bool hasTailCall() const
Returns true if the function contains a tail call.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int getObjectIndexBegin() const
Return the minimum frame object index.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * cloneInfo(const Ty &Old)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
This interface provides simple read-only access to a block of memory, and provides simple methods for...
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition SIMachineFunctionInfo.cpp:792
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
Definition SIMachineFunctionInfo.cpp:363
Register addPrivateSegmentSize(const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:265
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Definition SIMachineFunctionInfo.cpp:315
Register addDispatchPtr(const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:229
Register getLongBranchReservedReg() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:258
unsigned getMaxWavesPerEU() const
ArrayRef< Register > getSGPRSpillPhysVGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:613
Register addQueuePtr(const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:236
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition SIMachineFunctionInfo.cpp:644
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Definition SIMachineFunctionInfo.cpp:502
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Definition SIMachineFunctionInfo.cpp:340
Register getSGPRForEXECCopy() const
bool mayUseAGPRs(const Function &F) const
Definition SIMachineFunctionInfo.cpp:839
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Definition SIMachineFunctionInfo.cpp:353
Register addLDSKernelId()
Definition SIMachineFunctionInfo.cpp:278
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Definition SIMachineFunctionInfo.cpp:455
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:243
Register addDispatchID(const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:251
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
Definition SIMachineFunctionInfo.cpp:568
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
Definition SIMachineFunctionInfo.cpp:207
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:220
const ReservedRegSet & getWWMReservedRegs() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:271
void limitOccupancy(const MachineFunction &MF)
Definition SIMachineFunctionInfo.cpp:214
SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)
Definition SIMachineFunctionInfo.cpp:284
void reserveWWMRegister(Register Reg)
static bool isChainScratchRegister(Register VGPR)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Represents a location in source code.
Represents a range in source code.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
unsigned getMainFileID() const
const MemoryBuffer * getMemoryBuffer(unsigned i) const
StringRef - Represent a constant reference to a string, i.e.
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
constexpr bool empty() const
empty - Check if the string is empty.
const TargetMachine & getTargetMachine() const
ArrayRef< MCPhysReg > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A raw_ostream that writes to an std::string.
unsigned getInitialPSInputAddr(const Function &F)
unsigned getDynamicVGPRBlockSize(const Function &F)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
constexpr unsigned DefaultMemoryClusterDWordsLimit
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
A serializaable representation of a reference to a stack object or fixed stack object.
This class should be specialized by any type that needs to be converted to/from a YAML mapping.
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > FirstKernArgPreloadReg
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
static SIArgument createArgument(bool IsReg)
unsigned MaxMemoryClusterDWords
StringValue SGPRForEXECCopy
SmallVector< StringValue > WWMReservedRegs
uint32_t HighBitsOf32BitAddress
SIMachineFunctionInfo()=default
StringValue FrameOffsetReg
StringValue LongBranchReservedReg
unsigned NumKernargPreloadSGPRs
uint64_t ExplicitKernArgSize
uint16_t NumWaveDispatchSGPRs
void mappingImpl(yaml::IO &YamlIO) override
Definition SIMachineFunctionInfo.cpp:788
unsigned DynamicVGPRBlockSize
StringValue VGPRForAGPRCopy
std::optional< SIArgumentInfo > ArgInfo
SmallVector< StringValue, 2 > SpillPhysVGPRS
std::optional< FrameIndex > ScavengeFI
uint16_t NumWaveDispatchVGPRs
unsigned BytesInStackArgArea
unsigned ScratchReservedForDynamicVGPRs
StringValue ScratchRSrcReg
StringValue StackPtrOffsetReg
A wrapper around std::string which contains a source range that's being set during parsing.