LLVM: llvm::AMDGPUSubtarget Class Reference (original) (raw)

#include "[Target/AMDGPU/AMDGPUSubtarget.h](AMDGPUSubtarget%5F8h%5Fsource.html)"

Public Types
enum Generation { INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 , NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 , GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11 }
Public Member Functions
AMDGPUSubtarget (Triple TT)
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize (CallingConv::ID CC) const
std::pair< unsigned, unsigned > getFlatWorkGroupSizes (const Function &F) const
std::optional< unsigned > getReqdWorkGroupSize (const Function &F, unsigned Dim) const
bool hasWavefrontsEvenlySplittingXDim (const Function &F, bool REquiresUniformYZ=false) const
std::pair< unsigned, unsigned > getWavesPerEU (const Function &F) const
std::pair< unsigned, unsigned > getWavesPerEU (const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsigned > getWavesPerEU (std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes, const Function &F) const
Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.
std::pair< unsigned, unsigned > getEffectiveWavesPerEU (std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
unsigned getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F and each workgroup running the function requires LDSBytes bytes of LDS space.
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes (const MachineFunction &MF) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF.
bool isAmdHsaOS () const
bool isAmdPalOS () const
bool isMesa3DOS () const
bool isMesaKernel (const Function &F) const
bool isAmdHsaOrMesa (const Function &F) const
bool isGCN () const
bool isGCN3Encoding () const
bool has16BitInsts () const
bool hasTrue16BitInsts () const
Return true if the subtarget supports True16 instructions.
bool useRealTrue16Insts () const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool hasD16Writes32BitVgpr () const
bool hasBF16TransInsts () const
bool hasBF16ConversionInsts () const
bool hasBF16PackedInsts () const
bool hasMadMixInsts () const
bool hasFP8ConversionScaleInsts () const
bool hasBF8ConversionScaleInsts () const
bool hasFP4ConversionScaleInsts () const
bool hasFP6BF6ConversionScaleInsts () const
bool hasF16BF16ToFP6BF6ConversionScaleInsts () const
bool hasCvtPkF16F32Inst () const
bool hasF32ToF16BF16ConversionSRInsts () const
bool hasMadMacF32Insts () const
bool hasDsSrc2Insts () const
bool hasSDWA () const
bool hasVOP3PInsts () const
bool hasMulI24 () const
bool hasMulU24 () const
bool hasSMulHi () const
bool hasInv2PiInlineImm () const
bool hasFminFmaxLegacy () const
bool hasTrigReducedRange () const
bool hasFastFMAF32 () const
bool isPromoteAllocaEnabled () const
unsigned getWavefrontSize () const
unsigned getWavefrontSizeLog2 () const
unsigned getLocalMemorySize () const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
unsigned getAddressableLocalMemorySize () const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getEUsPerCU () const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.
Align getAlignmentForImplicitArgPtr () const
unsigned getExplicitKernelArgOffset () const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
virtual unsigned getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0
virtual unsigned getMinFlatWorkGroupSize () const =0
virtual unsigned getMaxFlatWorkGroupSize () const =0
virtual unsigned getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const =0
virtual unsigned getMinWavesPerEU () const =0
unsigned getMaxWavesPerEU () const
unsigned getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
SmallVector< unsigned > getMaxNumWorkGroups (const Function &F) const
Return the number of work groups for the function.
bool isSingleLaneExecution (const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
bool makeLIDRangeMetadata (Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned getImplicitArgNumBytes (const Function &F) const
uint64_t getExplicitKernArgSize (const Function &F, Align &MaxAlign) const
unsigned getKernArgSegmentSize (const Function &F, Align &MaxAlign) const
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour () const
virtual ~AMDGPUSubtarget ()=default
Protected Attributes
bool GCN3Encoding = false
bool Has16BitInsts = false
bool HasTrue16BitInsts = false
bool HasFP8ConversionScaleInsts = false
bool HasBF8ConversionScaleInsts = false
bool HasFP4ConversionScaleInsts = false
bool HasFP6BF6ConversionScaleInsts = false
bool HasF16BF16ToFP6BF6ConversionScaleInsts = false
bool HasCvtPkF16F32Inst = false
bool HasF32ToF16BF16ConversionSRInsts = false
bool EnableRealTrue16Insts = false
bool EnableD16Writes32BitVgpr = false
bool HasBF16TransInsts = false
bool HasBF16ConversionInsts = false
bool HasBF16PackedInsts = false
bool HasMadMixInsts = false
bool HasMadMacF32Insts = false
bool HasDsSrc2Insts = false
bool HasSDWA = false
bool HasVOP3PInsts = false
bool HasMulI24 = true
bool HasMulU24 = true
bool HasSMulHi = false
bool HasInv2PiInlineImm = false
bool HasFminFmaxLegacy = true
bool EnablePromoteAlloca = false
bool HasTrigReducedRange = false
bool FastFMAF32 = false
unsigned EUsPerCU = 4
unsigned MaxWavesPerEU = 10
unsigned LocalMemorySize = 0
unsigned AddressableLocalMemorySize = 0
char WavefrontSizeLog2 = 0

Definition at line 30 of file AMDGPUSubtarget.h.

Generation

Enumerator
INVALID
R600
R700
EVERGREEN
NORTHERN_ISLANDS
SOUTHERN_ISLANDS
SEA_ISLANDS
VOLCANIC_ISLANDS
GFX9
GFX10
GFX11
GFX12

Definition at line 32 of file AMDGPUSubtarget.h.

AMDGPUSubtarget::AMDGPUSubtarget ( Triple TT )

~AMDGPUSubtarget()

virtual llvm::AMDGPUSubtarget::~AMDGPUSubtarget ( ) virtualdefault

get() [1/2]

get() [2/2]

getAddressableLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getAddressableLocalMemorySize ( ) const inline

Return the maximum number of bytes of LDS that can be allocated to a single workgroup.

For GFX10-GFX12 in WGP mode this is limited to 64k even though the WGP has 128k in total.

Definition at line 330 of file AMDGPUSubtarget.h.

References AddressableLocalMemorySize.

getAlignmentForImplicitArgPtr()

Align llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr ( ) const inline

getAMDGPUDwarfFlavour()

getDefaultFlatWorkGroupSize()

getEffectiveWavesPerEU()

getEUsPerCU()

unsigned llvm::AMDGPUSubtarget::getEUsPerCU ( ) const inline

getExplicitKernArgSize()

getExplicitKernelArgOffset()

unsigned llvm::AMDGPUSubtarget::getExplicitKernelArgOffset ( ) const inline

getFlatWorkGroupSizes()

Returns

Subtarget's default pair of minimum/maximum flat work group sizes for function F, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F.

Subtarget's default values if explicitly requested values cannot be converted to integer, or violate subtarget's specifications.

Definition at line 163 of file AMDGPUSubtarget.cpp.

References llvm::Default, F, getDefaultFlatWorkGroupSize(), llvm::AMDGPU::getIntegerPairAttribute(), getMaxFlatWorkGroupSize(), and getMinFlatWorkGroupSize().

Referenced by getMaxLocalMemSizeWithWaveCount(), getMaxWorkitemID(), getOccupancyWithWorkGroupSizes(), getWavesPerEU(), and makeLIDRangeMetadata().

getImplicitArgNumBytes()

getKernArgSegmentSize()

getLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getLocalMemorySize ( ) const inline

getMaxFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize ( ) const pure virtual

getMaxLocalMemSizeWithWaveCount()

getMaxNumWorkGroups()

getMaxWavesPerEU()

unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const inline

getMaxWorkGroupsPerCU()

virtual unsigned llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize) const pure virtual

getMaxWorkitemID()

getMinFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize ( ) const pure virtual

getMinWavesPerEU()

virtual unsigned llvm::AMDGPUSubtarget::getMinWavesPerEU ( ) const pure virtual

getOccupancyWithWorkGroupSizes() [1/3]

getOccupancyWithWorkGroupSizes() [2/3]

getOccupancyWithWorkGroupSizes() [3/3]

getReqdWorkGroupSize()

getWavefrontSize()

unsigned llvm::AMDGPUSubtarget::getWavefrontSize ( ) const inline

getWavefrontSizeLog2()

unsigned llvm::AMDGPUSubtarget::getWavefrontSizeLog2 ( ) const inline

getWavesPerEU() [1/3]

Returns

Subtarget's default pair of minimum/maximum number of waves per execution unit for function F, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F.

Subtarget's default values if explicitly requested values cannot be converted to integer, violate subtarget's specifications, or are not compatible with minimum/maximum number of waves limited by flat work group size, register usage, and/or lds usage.

Definition at line 213 of file AMDGPUSubtarget.cpp.

References F, getFlatWorkGroupSizes(), llvm::AMDGPU::getIntegerPairAttribute(), and getWavesPerEU().

Referenced by llvm::GCNSubtarget::getMaxNumSGPRs(), llvm::GCNSubtarget::getMaxNumVGPRs(), and getWavesPerEU().

getWavesPerEU() [2/3]

Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.

FlatWorkGroupSizes Should correspond to the function's value for getFlatWorkGroupSizes.

References F.

getWavesPerEU() [3/3]

getWavesPerEUForWorkGroup()

virtual unsigned llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup ( unsigned FlatWorkGroupSize) const pure virtual

has16BitInsts()

bool llvm::AMDGPUSubtarget::has16BitInsts ( ) const inline

hasBF16ConversionInsts()

bool llvm::AMDGPUSubtarget::hasBF16ConversionInsts ( ) const inline

hasBF16PackedInsts()

bool llvm::AMDGPUSubtarget::hasBF16PackedInsts ( ) const inline

hasBF16TransInsts()

bool llvm::AMDGPUSubtarget::hasBF16TransInsts ( ) const inline

hasBF8ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasBF8ConversionScaleInsts ( ) const inline

hasCvtPkF16F32Inst()

bool llvm::AMDGPUSubtarget::hasCvtPkF16F32Inst ( ) const inline

hasD16Writes32BitVgpr()

bool AMDGPUSubtarget::hasD16Writes32BitVgpr ( ) const

hasDsSrc2Insts()

bool llvm::AMDGPUSubtarget::hasDsSrc2Insts ( ) const inline

hasF16BF16ToFP6BF6ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasF16BF16ToFP6BF6ConversionScaleInsts ( ) const inline

hasF32ToF16BF16ConversionSRInsts()

bool llvm::AMDGPUSubtarget::hasF32ToF16BF16ConversionSRInsts ( ) const inline

hasFastFMAF32()

bool llvm::AMDGPUSubtarget::hasFastFMAF32 ( ) const inline

hasFminFmaxLegacy()

bool llvm::AMDGPUSubtarget::hasFminFmaxLegacy ( ) const inline

hasFP4ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP4ConversionScaleInsts ( ) const inline

hasFP6BF6ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP6BF6ConversionScaleInsts ( ) const inline

hasFP8ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP8ConversionScaleInsts ( ) const inline

hasInv2PiInlineImm()

bool llvm::AMDGPUSubtarget::hasInv2PiInlineImm ( ) const inline

hasMadMacF32Insts()

bool llvm::AMDGPUSubtarget::hasMadMacF32Insts ( ) const inline

hasMadMixInsts()

bool llvm::AMDGPUSubtarget::hasMadMixInsts ( ) const inline

hasMulI24()

bool llvm::AMDGPUSubtarget::hasMulI24 ( ) const inline

hasMulU24()

bool llvm::AMDGPUSubtarget::hasMulU24 ( ) const inline

hasSDWA()

bool llvm::AMDGPUSubtarget::hasSDWA ( ) const inline

hasSMulHi()

bool llvm::AMDGPUSubtarget::hasSMulHi ( ) const inline

hasTrigReducedRange()

bool llvm::AMDGPUSubtarget::hasTrigReducedRange ( ) const inline

hasTrue16BitInsts()

bool llvm::AMDGPUSubtarget::hasTrue16BitInsts ( ) const inline

hasVOP3PInsts()

bool llvm::AMDGPUSubtarget::hasVOP3PInsts ( ) const inline

hasWavefrontsEvenlySplittingXDim()

bool AMDGPUSubtarget::hasWavefrontsEvenlySplittingXDim ( const Function & F,
bool REquiresUniformYZ = false ) const

Returns

true if F will execute in a manner that leaves the X dimensions of the workitem ID evenly tiling wavefronts - that is, if X / wavefrontsize is uniform. This is true if either the Y and Z block dimensions are known to always be 1 or if the X dimension will always be a power of 2. If RequireUniformYZ is true, it also ensures that the Y and Z workitem IDs will be uniform (so, while a (32, 2, 1) launch with wavesize64 would ordinarily pass this test, it won't with \pRequiresUniformYZ).

This information is currently only gathered from the !reqd_work_group_size metadata on F, but this may be improved in the future.

Definition at line 245 of file AMDGPUSubtarget.cpp.

References llvm::mdconst::extract(), F, getWavefrontSize(), and llvm::isPowerOf2_32().

isAmdHsaOrMesa()

isAmdHsaOS()

bool llvm::AMDGPUSubtarget::isAmdHsaOS ( ) const inline

isAmdPalOS()

bool llvm::AMDGPUSubtarget::isAmdPalOS ( ) const inline

isGCN()

bool llvm::AMDGPUSubtarget::isGCN ( ) const inline

isGCN3Encoding()

bool llvm::AMDGPUSubtarget::isGCN3Encoding ( ) const inline

isMesa3DOS()

bool llvm::AMDGPUSubtarget::isMesa3DOS ( ) const inline

isMesaKernel()

isPromoteAllocaEnabled()

bool llvm::AMDGPUSubtarget::isPromoteAllocaEnabled ( ) const inline

isSingleLaneExecution()

makeLIDRangeMetadata()

useRealTrue16Insts()

bool AMDGPUSubtarget::useRealTrue16Insts ( ) const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.

Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.

Definition at line 37 of file AMDGPUSubtarget.cpp.

References EnableRealTrue16Insts, and hasTrue16BitInsts().

AddressableLocalMemorySize

unsigned llvm::AMDGPUSubtarget::AddressableLocalMemorySize = 0 protected

EnableD16Writes32BitVgpr

bool llvm::AMDGPUSubtarget::EnableD16Writes32BitVgpr = false protected

EnablePromoteAlloca

bool llvm::AMDGPUSubtarget::EnablePromoteAlloca = false protected

EnableRealTrue16Insts

bool llvm::AMDGPUSubtarget::EnableRealTrue16Insts = false protected

EUsPerCU

unsigned llvm::AMDGPUSubtarget::EUsPerCU = 4 protected

FastFMAF32

bool llvm::AMDGPUSubtarget::FastFMAF32 = false protected

GCN3Encoding

bool llvm::AMDGPUSubtarget::GCN3Encoding = false protected

Has16BitInsts

bool llvm::AMDGPUSubtarget::Has16BitInsts = false protected

HasBF16ConversionInsts

bool llvm::AMDGPUSubtarget::HasBF16ConversionInsts = false protected

HasBF16PackedInsts

bool llvm::AMDGPUSubtarget::HasBF16PackedInsts = false protected

HasBF16TransInsts

bool llvm::AMDGPUSubtarget::HasBF16TransInsts = false protected

HasBF8ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasBF8ConversionScaleInsts = false protected

HasCvtPkF16F32Inst

bool llvm::AMDGPUSubtarget::HasCvtPkF16F32Inst = false protected

HasDsSrc2Insts

bool llvm::AMDGPUSubtarget::HasDsSrc2Insts = false protected

HasF16BF16ToFP6BF6ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasF16BF16ToFP6BF6ConversionScaleInsts = false protected

HasF32ToF16BF16ConversionSRInsts

bool llvm::AMDGPUSubtarget::HasF32ToF16BF16ConversionSRInsts = false protected

HasFminFmaxLegacy

bool llvm::AMDGPUSubtarget::HasFminFmaxLegacy = true protected

HasFP4ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP4ConversionScaleInsts = false protected

HasFP6BF6ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP6BF6ConversionScaleInsts = false protected

HasFP8ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP8ConversionScaleInsts = false protected

HasInv2PiInlineImm

bool llvm::AMDGPUSubtarget::HasInv2PiInlineImm = false protected

HasMadMacF32Insts

bool llvm::AMDGPUSubtarget::HasMadMacF32Insts = false protected

HasMadMixInsts

bool llvm::AMDGPUSubtarget::HasMadMixInsts = false protected

HasMulI24

bool llvm::AMDGPUSubtarget::HasMulI24 = true protected

HasMulU24

bool llvm::AMDGPUSubtarget::HasMulU24 = true protected

HasSDWA

bool llvm::AMDGPUSubtarget::HasSDWA = false protected

HasSMulHi

bool llvm::AMDGPUSubtarget::HasSMulHi = false protected

HasTrigReducedRange

bool llvm::AMDGPUSubtarget::HasTrigReducedRange = false protected

HasTrue16BitInsts

bool llvm::AMDGPUSubtarget::HasTrue16BitInsts = false protected

HasVOP3PInsts

bool llvm::AMDGPUSubtarget::HasVOP3PInsts = false protected

LocalMemorySize

unsigned llvm::AMDGPUSubtarget::LocalMemorySize = 0 protected

MaxWavesPerEU

unsigned llvm::AMDGPUSubtarget::MaxWavesPerEU = 10 protected

WavefrontSizeLog2

char llvm::AMDGPUSubtarget::WavefrontSizeLog2 = 0 protected

The documentation for this class was generated from the following files: