LLVM: llvm::AMDGPUSubtarget Class Reference (original ) (raw )#include "[Target/AMDGPU/AMDGPUSubtarget.h](AMDGPUSubtarget%5F8h%5Fsource.html)"
Public Types
enum
Generation { INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 , NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 , GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11 }
Public Member Functions
AMDGPUSubtarget (Triple TT)
std::pair< unsigned , unsigned >
getDefaultFlatWorkGroupSize (CallingConv::ID CC) const
std::pair< unsigned , unsigned >
getFlatWorkGroupSizes (const Function &F ) const
std::optional< unsigned >
getReqdWorkGroupSize (const Function &F , unsigned Dim) const
bool
hasWavefrontsEvenlySplittingXDim (const Function &F , bool REquiresUniformYZ=false) const
std::pair< unsigned , unsigned >
getWavesPerEU (const Function &F ) const
std::pair< unsigned , unsigned >
getWavesPerEU (const Function &F , std::pair< unsigned , unsigned > FlatWorkGroupSizes) const
Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned , unsigned >
getWavesPerEU (std::pair< unsigned , unsigned > FlatWorkGroupSizes, unsigned LDSBytes, const Function &F ) const
Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.
std::pair< unsigned , unsigned >
getEffectiveWavesPerEU (std::pair< unsigned , unsigned > RequestedWavesPerEU, std::pair< unsigned , unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
unsigned
getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
std::pair< unsigned , unsigned >
getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, const Function &F ) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F and each workgroup running the function requires LDSBytes bytes of LDS space.
std::pair< unsigned , unsigned >
getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, std::pair< unsigned , unsigned > FlatWorkGroupSizes) const
Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned , unsigned >
getOccupancyWithWorkGroupSizes (const MachineFunction &MF) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF.
bool
isAmdHsaOS () const
bool
isAmdPalOS () const
bool
isMesa3DOS () const
bool
isMesaKernel (const Function &F ) const
bool
isAmdHsaOrMesa (const Function &F ) const
bool
isGCN () const
bool
isGCN3Encoding () const
bool
has16BitInsts () const
bool
hasTrue16BitInsts () const
Return true if the subtarget supports True16 instructions.
bool
useRealTrue16Insts () const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool
hasD16Writes32BitVgpr () const
bool
hasBF16TransInsts () const
bool
hasBF16ConversionInsts () const
bool
hasBF16PackedInsts () const
bool
hasMadMixInsts () const
bool
hasFP8ConversionScaleInsts () const
bool
hasBF8ConversionScaleInsts () const
bool
hasFP4ConversionScaleInsts () const
bool
hasFP6BF6ConversionScaleInsts () const
bool
hasF16BF16ToFP6BF6ConversionScaleInsts () const
bool
hasCvtPkF16F32Inst () const
bool
hasF32ToF16BF16ConversionSRInsts () const
bool
hasMadMacF32Insts () const
bool
hasDsSrc2Insts () const
bool
hasSDWA () const
bool
hasVOP3PInsts () const
bool
hasMulI24 () const
bool
hasMulU24 () const
bool
hasSMulHi () const
bool
hasInv2PiInlineImm () const
bool
hasFminFmaxLegacy () const
bool
hasTrigReducedRange () const
bool
hasFastFMAF32 () const
bool
isPromoteAllocaEnabled () const
unsigned
getWavefrontSize () const
unsigned
getWavefrontSizeLog2 () const
unsigned
getLocalMemorySize () const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU .
unsigned
getAddressableLocalMemorySize () const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned
getEUsPerCU () const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.
Align
getAlignmentForImplicitArgPtr () const
unsigned
getExplicitKernelArgOffset () const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
virtual unsigned
getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0
virtual unsigned
getMinFlatWorkGroupSize () const =0
virtual unsigned
getMaxFlatWorkGroupSize () const =0
virtual unsigned
getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const =0
virtual unsigned
getMinWavesPerEU () const =0
unsigned
getMaxWavesPerEU () const
unsigned
getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
SmallVector < unsigned >
getMaxNumWorkGroups (const Function &F ) const
Return the number of work groups for the function.
bool
isSingleLaneExecution (const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
bool
makeLIDRangeMetadata (Instruction *I ) const
Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned
getImplicitArgNumBytes (const Function &F ) const
uint64_t
getExplicitKernArgSize (const Function &F , Align &MaxAlign) const
unsigned
getKernArgSegmentSize (const Function &F , Align &MaxAlign) const
AMDGPUDwarfFlavour
getAMDGPUDwarfFlavour () const
virtual
~AMDGPUSubtarget ()=default
Definition at line 30 of file AMDGPUSubtarget.h .
◆ Generation
Enumerator
INVALID
R600
R700
EVERGREEN
NORTHERN_ISLANDS
SOUTHERN_ISLANDS
SEA_ISLANDS
VOLCANIC_ISLANDS
GFX9
GFX10
GFX11
GFX12
Definition at line 32 of file AMDGPUSubtarget.h .
AMDGPUSubtarget::AMDGPUSubtarget
(
Triple
TT
)
◆ ~AMDGPUSubtarget()
virtual llvm::AMDGPUSubtarget::~AMDGPUSubtarget ( )
virtualdefault
◆ get() [1/2]◆ get() [2/2]◆ getAddressableLocalMemorySize()
unsigned llvm::AMDGPUSubtarget::getAddressableLocalMemorySize ( ) const
inline
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
For GFX10-GFX12 in WGP mode this is limited to 64k even though the WGP has 128k in total.
Definition at line 330 of file AMDGPUSubtarget.h .
References AddressableLocalMemorySize .
◆ getAlignmentForImplicitArgPtr()
Align llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr ( ) const
inline
◆ getAMDGPUDwarfFlavour()◆ getDefaultFlatWorkGroupSize()◆ getEffectiveWavesPerEU()◆ getEUsPerCU()
unsigned llvm::AMDGPUSubtarget::getEUsPerCU ( ) const
inline
◆ getExplicitKernArgSize()◆ getExplicitKernelArgOffset()
unsigned llvm::AMDGPUSubtarget::getExplicitKernelArgOffset ( ) const
inline
◆ getFlatWorkGroupSizes()Returns
Subtarget's default pair of minimum/maximum flat work group sizes for function F, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F.
Subtarget's default values if explicitly requested values cannot be converted to integer, or violate subtarget's specifications.
Definition at line 163 of file AMDGPUSubtarget.cpp .
References llvm::Default , F , getDefaultFlatWorkGroupSize() , llvm::AMDGPU::getIntegerPairAttribute() , getMaxFlatWorkGroupSize() , and getMinFlatWorkGroupSize() .
Referenced by getMaxLocalMemSizeWithWaveCount() , getMaxWorkitemID() , getOccupancyWithWorkGroupSizes() , getWavesPerEU() , and makeLIDRangeMetadata() .
◆ getImplicitArgNumBytes()◆ getKernArgSegmentSize()◆ getLocalMemorySize()
unsigned llvm::AMDGPUSubtarget::getLocalMemorySize ( ) const
inline
◆ getMaxFlatWorkGroupSize()
virtual unsigned llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize ( ) const
pure virtual
◆ getMaxLocalMemSizeWithWaveCount()◆ getMaxNumWorkGroups()◆ getMaxWavesPerEU()
unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const
inline
◆ getMaxWorkGroupsPerCU()
virtual unsigned llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize ) const
pure virtual
◆ getMaxWorkitemID()◆ getMinFlatWorkGroupSize()
virtual unsigned llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize ( ) const
pure virtual
◆ getMinWavesPerEU()
virtual unsigned llvm::AMDGPUSubtarget::getMinWavesPerEU ( ) const
pure virtual
◆ getOccupancyWithWorkGroupSizes() [1/3]◆ getOccupancyWithWorkGroupSizes() [2/3]◆ getOccupancyWithWorkGroupSizes() [3/3]◆ getReqdWorkGroupSize()◆ getWavefrontSize()
unsigned llvm::AMDGPUSubtarget::getWavefrontSize ( ) const
inline
◆ getWavefrontSizeLog2()
unsigned llvm::AMDGPUSubtarget::getWavefrontSizeLog2 ( ) const
inline
◆ getWavesPerEU() [1/3]Returns
Subtarget's default pair of minimum/maximum number of waves per execution unit for function F, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F.
Subtarget's default values if explicitly requested values cannot be converted to integer, violate subtarget's specifications, or are not compatible with minimum/maximum number of waves limited by flat work group size, register usage, and/or lds usage.
Definition at line 213 of file AMDGPUSubtarget.cpp .
References F , getFlatWorkGroupSizes() , llvm::AMDGPU::getIntegerPairAttribute() , and getWavesPerEU() .
Referenced by llvm::GCNSubtarget::getMaxNumSGPRs() , llvm::GCNSubtarget::getMaxNumVGPRs() , and getWavesPerEU() .
◆ getWavesPerEU() [2/3]Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
FlatWorkGroupSizes Should correspond to the function's value for getFlatWorkGroupSizes.
References F .
◆ getWavesPerEU() [3/3]◆ getWavesPerEUForWorkGroup()
virtual unsigned llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup ( unsigned FlatWorkGroupSize ) const
pure virtual
◆ has16BitInsts()
bool llvm::AMDGPUSubtarget::has16BitInsts ( ) const
inline
◆ hasBF16ConversionInsts()
bool llvm::AMDGPUSubtarget::hasBF16ConversionInsts ( ) const
inline
◆ hasBF16PackedInsts()
bool llvm::AMDGPUSubtarget::hasBF16PackedInsts ( ) const
inline
◆ hasBF16TransInsts()
bool llvm::AMDGPUSubtarget::hasBF16TransInsts ( ) const
inline
◆ hasBF8ConversionScaleInsts()
bool llvm::AMDGPUSubtarget::hasBF8ConversionScaleInsts ( ) const
inline
◆ hasCvtPkF16F32Inst()
bool llvm::AMDGPUSubtarget::hasCvtPkF16F32Inst ( ) const
inline
◆ hasD16Writes32BitVgpr()
bool AMDGPUSubtarget::hasD16Writes32BitVgpr
(
)
const
◆ hasDsSrc2Insts()
bool llvm::AMDGPUSubtarget::hasDsSrc2Insts ( ) const
inline
◆ hasF16BF16ToFP6BF6ConversionScaleInsts()
bool llvm::AMDGPUSubtarget::hasF16BF16ToFP6BF6ConversionScaleInsts ( ) const
inline
◆ hasF32ToF16BF16ConversionSRInsts()
bool llvm::AMDGPUSubtarget::hasF32ToF16BF16ConversionSRInsts ( ) const
inline
◆ hasFastFMAF32()
bool llvm::AMDGPUSubtarget::hasFastFMAF32 ( ) const
inline
◆ hasFminFmaxLegacy()
bool llvm::AMDGPUSubtarget::hasFminFmaxLegacy ( ) const
inline
◆ hasFP4ConversionScaleInsts()
bool llvm::AMDGPUSubtarget::hasFP4ConversionScaleInsts ( ) const
inline
◆ hasFP6BF6ConversionScaleInsts()
bool llvm::AMDGPUSubtarget::hasFP6BF6ConversionScaleInsts ( ) const
inline
◆ hasFP8ConversionScaleInsts()
bool llvm::AMDGPUSubtarget::hasFP8ConversionScaleInsts ( ) const
inline
◆ hasInv2PiInlineImm()
bool llvm::AMDGPUSubtarget::hasInv2PiInlineImm ( ) const
inline
◆ hasMadMacF32Insts()
bool llvm::AMDGPUSubtarget::hasMadMacF32Insts ( ) const
inline
◆ hasMadMixInsts()
bool llvm::AMDGPUSubtarget::hasMadMixInsts ( ) const
inline
◆ hasMulI24()
bool llvm::AMDGPUSubtarget::hasMulI24 ( ) const
inline
◆ hasMulU24()
bool llvm::AMDGPUSubtarget::hasMulU24 ( ) const
inline
◆ hasSDWA()
bool llvm::AMDGPUSubtarget::hasSDWA ( ) const
inline
◆ hasSMulHi()
bool llvm::AMDGPUSubtarget::hasSMulHi ( ) const
inline
◆ hasTrigReducedRange()
bool llvm::AMDGPUSubtarget::hasTrigReducedRange ( ) const
inline
◆ hasTrue16BitInsts()
bool llvm::AMDGPUSubtarget::hasTrue16BitInsts ( ) const
inline
◆ hasVOP3PInsts()
bool llvm::AMDGPUSubtarget::hasVOP3PInsts ( ) const
inline
◆ hasWavefrontsEvenlySplittingXDim()
bool AMDGPUSubtarget::hasWavefrontsEvenlySplittingXDim
(
const Function &
F ,
bool
REquiresUniformYZ = false ) const
Returns
true if F will execute in a manner that leaves the X dimensions of the workitem ID evenly tiling wavefronts - that is, if X / wavefrontsize is uniform. This is true if either the Y and Z block dimensions are known to always be 1 or if the X dimension will always be a power of 2. If RequireUniformYZ is true, it also ensures that the Y and Z workitem IDs will be uniform (so, while a (32, 2, 1) launch with wavesize64 would ordinarily pass this test, it won't with \pRequiresUniformYZ).
This information is currently only gathered from the !reqd_work_group_size metadata on F, but this may be improved in the future.
Definition at line 245 of file AMDGPUSubtarget.cpp .
References llvm::mdconst::extract() , F , getWavefrontSize() , and llvm::isPowerOf2_32() .
◆ isAmdHsaOrMesa()◆ isAmdHsaOS()
bool llvm::AMDGPUSubtarget::isAmdHsaOS ( ) const
inline
◆ isAmdPalOS()
bool llvm::AMDGPUSubtarget::isAmdPalOS ( ) const
inline
◆ isGCN()
bool llvm::AMDGPUSubtarget::isGCN ( ) const
inline
◆ isGCN3Encoding()
bool llvm::AMDGPUSubtarget::isGCN3Encoding ( ) const
inline
◆ isMesa3DOS()
bool llvm::AMDGPUSubtarget::isMesa3DOS ( ) const
inline
◆ isMesaKernel()
bool llvm::AMDGPUSubtarget::isPromoteAllocaEnabled ( ) const
inline
◆ isSingleLaneExecution()◆ useRealTrue16Insts()
bool AMDGPUSubtarget::useRealTrue16Insts
(
)
const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.
Definition at line 37 of file AMDGPUSubtarget.cpp .
References EnableRealTrue16Insts , and hasTrue16BitInsts() .
◆ AddressableLocalMemorySize
unsigned llvm::AMDGPUSubtarget::AddressableLocalMemorySize = 0
protected
◆ EnableD16Writes32BitVgpr
bool llvm::AMDGPUSubtarget::EnableD16Writes32BitVgpr = false
protected
bool llvm::AMDGPUSubtarget::EnablePromoteAlloca = false
protected
◆ EnableRealTrue16Insts
bool llvm::AMDGPUSubtarget::EnableRealTrue16Insts = false
protected
◆ EUsPerCU
unsigned llvm::AMDGPUSubtarget::EUsPerCU = 4
protected
◆ FastFMAF32
bool llvm::AMDGPUSubtarget::FastFMAF32 = false
protected
◆ GCN3Encoding
bool llvm::AMDGPUSubtarget::GCN3Encoding = false
protected
◆ Has16BitInsts
bool llvm::AMDGPUSubtarget::Has16BitInsts = false
protected
◆ HasBF16ConversionInsts
bool llvm::AMDGPUSubtarget::HasBF16ConversionInsts = false
protected
◆ HasBF16PackedInsts
bool llvm::AMDGPUSubtarget::HasBF16PackedInsts = false
protected
◆ HasBF16TransInsts
bool llvm::AMDGPUSubtarget::HasBF16TransInsts = false
protected
◆ HasBF8ConversionScaleInsts
bool llvm::AMDGPUSubtarget::HasBF8ConversionScaleInsts = false
protected
◆ HasCvtPkF16F32Inst
bool llvm::AMDGPUSubtarget::HasCvtPkF16F32Inst = false
protected
◆ HasDsSrc2Insts
bool llvm::AMDGPUSubtarget::HasDsSrc2Insts = false
protected
◆ HasF16BF16ToFP6BF6ConversionScaleInsts
bool llvm::AMDGPUSubtarget::HasF16BF16ToFP6BF6ConversionScaleInsts = false
protected
◆ HasF32ToF16BF16ConversionSRInsts
bool llvm::AMDGPUSubtarget::HasF32ToF16BF16ConversionSRInsts = false
protected
◆ HasFminFmaxLegacy
bool llvm::AMDGPUSubtarget::HasFminFmaxLegacy = true
protected
◆ HasFP4ConversionScaleInsts
bool llvm::AMDGPUSubtarget::HasFP4ConversionScaleInsts = false
protected
◆ HasFP6BF6ConversionScaleInsts
bool llvm::AMDGPUSubtarget::HasFP6BF6ConversionScaleInsts = false
protected
◆ HasFP8ConversionScaleInsts
bool llvm::AMDGPUSubtarget::HasFP8ConversionScaleInsts = false
protected
◆ HasInv2PiInlineImm
bool llvm::AMDGPUSubtarget::HasInv2PiInlineImm = false
protected
◆ HasMadMacF32Insts
bool llvm::AMDGPUSubtarget::HasMadMacF32Insts = false
protected
◆ HasMadMixInsts
bool llvm::AMDGPUSubtarget::HasMadMixInsts = false
protected
◆ HasMulI24
bool llvm::AMDGPUSubtarget::HasMulI24 = true
protected
◆ HasMulU24
bool llvm::AMDGPUSubtarget::HasMulU24 = true
protected
◆ HasSDWA
bool llvm::AMDGPUSubtarget::HasSDWA = false
protected
◆ HasSMulHi
bool llvm::AMDGPUSubtarget::HasSMulHi = false
protected
◆ HasTrigReducedRange
bool llvm::AMDGPUSubtarget::HasTrigReducedRange = false
protected
◆ HasTrue16BitInsts
bool llvm::AMDGPUSubtarget::HasTrue16BitInsts = false
protected
◆ HasVOP3PInsts
bool llvm::AMDGPUSubtarget::HasVOP3PInsts = false
protected
◆ LocalMemorySize
unsigned llvm::AMDGPUSubtarget::LocalMemorySize = 0
protected
◆ MaxWavesPerEU
unsigned llvm::AMDGPUSubtarget::MaxWavesPerEU = 10
protected
◆ WavefrontSizeLog2
char llvm::AMDGPUSubtarget::WavefrontSizeLog2 = 0
protected
The documentation for this class was generated from the following files: