LLVM: llvm::AMDGPUSubtarget Class Reference (original) (raw)

#include "[Target/AMDGPU/AMDGPUSubtarget.h](AMDGPUSubtarget%5F8h%5Fsource.html)"

Public Types
enum	Generation { INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 , NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 , GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11 }

Public Member Functions
	AMDGPUSubtarget (Triple TT)
std::pair< unsigned, unsigned >	getDefaultFlatWorkGroupSize (CallingConv::ID CC) const
std::pair< unsigned, unsigned >	getFlatWorkGroupSizes (const Function &F) const
std::optional< unsigned >	getReqdWorkGroupSize (const Function &F, unsigned Dim) const
bool	hasWavefrontsEvenlySplittingXDim (const Function &F, bool REquiresUniformYZ=false) const
std::pair< unsigned, unsigned >	getWavesPerEU (const Function &F) const
std::pair< unsigned, unsigned >	getWavesPerEU (const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
	Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsigned >	getWavesPerEU (std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes, const Function &F) const
	Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.
std::pair< unsigned, unsigned >	getEffectiveWavesPerEU (std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
	Returns the target minimum/maximum number of waves per EU.
unsigned	getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
	Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, const Function &F) const
	Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F and each workgroup running the function requires LDSBytes bytes of LDS space.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
	Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsigned >	getOccupancyWithWorkGroupSizes (const MachineFunction &MF) const
	Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF.
bool	isAmdHsaOS () const
bool	isAmdPalOS () const
bool	isMesa3DOS () const
bool	isMesaKernel (const Function &F) const
bool	isAmdHsaOrMesa (const Function &F) const
bool	isGCN () const
bool	isGCN3Encoding () const
bool	has16BitInsts () const
bool	hasTrue16BitInsts () const
	Return true if the subtarget supports True16 instructions.
bool	useRealTrue16Insts () const
	Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool	hasD16Writes32BitVgpr () const
bool	hasBF16TransInsts () const
bool	hasBF16ConversionInsts () const
bool	hasBF16PackedInsts () const
bool	hasMadMixInsts () const
bool	hasFP8ConversionScaleInsts () const
bool	hasBF8ConversionScaleInsts () const
bool	hasFP4ConversionScaleInsts () const
bool	hasFP6BF6ConversionScaleInsts () const
bool	hasF16BF16ToFP6BF6ConversionScaleInsts () const
bool	hasCvtPkF16F32Inst () const
bool	hasF32ToF16BF16ConversionSRInsts () const
bool	hasMadMacF32Insts () const
bool	hasDsSrc2Insts () const
bool	hasSDWA () const
bool	hasVOP3PInsts () const
bool	hasMulI24 () const
bool	hasMulU24 () const
bool	hasSMulHi () const
bool	hasInv2PiInlineImm () const
bool	hasFminFmaxLegacy () const
bool	hasTrigReducedRange () const
bool	hasFastFMAF32 () const
bool	isPromoteAllocaEnabled () const
unsigned	getWavefrontSize () const
unsigned	getWavefrontSizeLog2 () const
unsigned	getLocalMemorySize () const
	Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
unsigned	getAddressableLocalMemorySize () const
	Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned	getEUsPerCU () const
	Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.
Align	getAlignmentForImplicitArgPtr () const
unsigned	getExplicitKernelArgOffset () const
	Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
virtual unsigned	getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0
virtual unsigned	getMinFlatWorkGroupSize () const =0
virtual unsigned	getMaxFlatWorkGroupSize () const =0
virtual unsigned	getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const =0
virtual unsigned	getMinWavesPerEU () const =0
unsigned	getMaxWavesPerEU () const
unsigned	getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const
	Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
SmallVector< unsigned >	getMaxNumWorkGroups (const Function &F) const
	Return the number of work groups for the function.
bool	isSingleLaneExecution (const Function &Kernel) const
	Return true if only a single workitem can be active in a wave.
bool	makeLIDRangeMetadata (Instruction *I) const
	Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned	getImplicitArgNumBytes (const Function &F) const
uint64_t	getExplicitKernArgSize (const Function &F, Align &MaxAlign) const
unsigned	getKernArgSegmentSize (const Function &F, Align &MaxAlign) const
AMDGPUDwarfFlavour	getAMDGPUDwarfFlavour () const
virtual	~AMDGPUSubtarget ()=default

Protected Attributes
bool	GCN3Encoding = false
bool	Has16BitInsts = false
bool	HasTrue16BitInsts = false
bool	HasFP8ConversionScaleInsts = false
bool	HasBF8ConversionScaleInsts = false
bool	HasFP4ConversionScaleInsts = false
bool	HasFP6BF6ConversionScaleInsts = false
bool	HasF16BF16ToFP6BF6ConversionScaleInsts = false
bool	HasCvtPkF16F32Inst = false
bool	HasF32ToF16BF16ConversionSRInsts = false
bool	EnableRealTrue16Insts = false
bool	EnableD16Writes32BitVgpr = false
bool	HasBF16TransInsts = false
bool	HasBF16ConversionInsts = false
bool	HasBF16PackedInsts = false
bool	HasMadMixInsts = false
bool	HasMadMacF32Insts = false
bool	HasDsSrc2Insts = false
bool	HasSDWA = false
bool	HasVOP3PInsts = false
bool	HasMulI24 = true
bool	HasMulU24 = true
bool	HasSMulHi = false
bool	HasInv2PiInlineImm = false
bool	HasFminFmaxLegacy = true
bool	EnablePromoteAlloca = false
bool	HasTrigReducedRange = false
bool	FastFMAF32 = false
unsigned	EUsPerCU = 4
unsigned	MaxWavesPerEU = 10
unsigned	LocalMemorySize = 0
unsigned	AddressableLocalMemorySize = 0
char	WavefrontSizeLog2 = 0

Definition at line 30 of file AMDGPUSubtarget.h.

◆ Generation

Enumerator
INVALID
R600
R700
EVERGREEN
NORTHERN_ISLANDS
SOUTHERN_ISLANDS
SEA_ISLANDS
VOLCANIC_ISLANDS
GFX9
GFX10
GFX11
GFX12

Definition at line 32 of file AMDGPUSubtarget.h.

AMDGPUSubtarget::AMDGPUSubtarget	(	Triple	TT	)

◆ ~AMDGPUSubtarget()

virtual llvm::AMDGPUSubtarget::~AMDGPUSubtarget ( )	virtualdefault

◆ get() [1/2]

◆ get() [2/2]

◆ getAddressableLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getAddressableLocalMemorySize ( ) const	inline

Return the maximum number of bytes of LDS that can be allocated to a single workgroup.

For GFX10-GFX12 in WGP mode this is limited to 64k even though the WGP has 128k in total.

Definition at line 330 of file AMDGPUSubtarget.h.

References AddressableLocalMemorySize.

◆ getAlignmentForImplicitArgPtr()

Align llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr ( ) const	inline

◆ getAMDGPUDwarfFlavour()

◆ getDefaultFlatWorkGroupSize()

◆ getEffectiveWavesPerEU()

◆ getEUsPerCU()

unsigned llvm::AMDGPUSubtarget::getEUsPerCU ( ) const	inline

◆ getExplicitKernArgSize()

◆ getExplicitKernelArgOffset()

unsigned llvm::AMDGPUSubtarget::getExplicitKernelArgOffset ( ) const	inline

◆ getFlatWorkGroupSizes()

Returns

Subtarget's default pair of minimum/maximum flat work group sizes for function F, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F.

Subtarget's default values if explicitly requested values cannot be converted to integer, or violate subtarget's specifications.

Definition at line 163 of file AMDGPUSubtarget.cpp.

References llvm::Default, F, getDefaultFlatWorkGroupSize(), llvm::AMDGPU::getIntegerPairAttribute(), getMaxFlatWorkGroupSize(), and getMinFlatWorkGroupSize().

Referenced by getMaxLocalMemSizeWithWaveCount(), getMaxWorkitemID(), getOccupancyWithWorkGroupSizes(), getWavesPerEU(), and makeLIDRangeMetadata().

◆ getImplicitArgNumBytes()

◆ getKernArgSegmentSize()

◆ getLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getLocalMemorySize ( ) const	inline

◆ getMaxFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize ( ) const	pure virtual

◆ getMaxLocalMemSizeWithWaveCount()

◆ getMaxNumWorkGroups()

◆ getMaxWavesPerEU()

unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const	inline

◆ getMaxWorkGroupsPerCU()

virtual unsigned llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize) const	pure virtual

◆ getMaxWorkitemID()

◆ getMinFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize ( ) const	pure virtual

◆ getMinWavesPerEU()

virtual unsigned llvm::AMDGPUSubtarget::getMinWavesPerEU ( ) const	pure virtual

◆ getOccupancyWithWorkGroupSizes() [1/3]

◆ getOccupancyWithWorkGroupSizes() [2/3]

◆ getOccupancyWithWorkGroupSizes() [3/3]

◆ getReqdWorkGroupSize()

◆ getWavefrontSize()

unsigned llvm::AMDGPUSubtarget::getWavefrontSize ( ) const	inline

◆ getWavefrontSizeLog2()

unsigned llvm::AMDGPUSubtarget::getWavefrontSizeLog2 ( ) const	inline

◆ getWavesPerEU() [1/3]

Returns

Subtarget's default pair of minimum/maximum number of waves per execution unit for function F, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F.

Subtarget's default values if explicitly requested values cannot be converted to integer, violate subtarget's specifications, or are not compatible with minimum/maximum number of waves limited by flat work group size, register usage, and/or lds usage.

Definition at line 213 of file AMDGPUSubtarget.cpp.

References F, getFlatWorkGroupSizes(), llvm::AMDGPU::getIntegerPairAttribute(), and getWavesPerEU().

Referenced by llvm::GCNSubtarget::getMaxNumSGPRs(), llvm::GCNSubtarget::getMaxNumVGPRs(), and getWavesPerEU().

◆ getWavesPerEU() [2/3]

Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.

FlatWorkGroupSizes Should correspond to the function's value for getFlatWorkGroupSizes.

References F.

◆ getWavesPerEU() [3/3]

◆ getWavesPerEUForWorkGroup()

virtual unsigned llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup ( unsigned FlatWorkGroupSize) const	pure virtual

◆ has16BitInsts()

bool llvm::AMDGPUSubtarget::has16BitInsts ( ) const	inline

◆ hasBF16ConversionInsts()

bool llvm::AMDGPUSubtarget::hasBF16ConversionInsts ( ) const	inline

◆ hasBF16PackedInsts()

bool llvm::AMDGPUSubtarget::hasBF16PackedInsts ( ) const	inline

◆ hasBF16TransInsts()

bool llvm::AMDGPUSubtarget::hasBF16TransInsts ( ) const	inline

◆ hasBF8ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasBF8ConversionScaleInsts ( ) const	inline

◆ hasCvtPkF16F32Inst()

bool llvm::AMDGPUSubtarget::hasCvtPkF16F32Inst ( ) const	inline

◆ hasD16Writes32BitVgpr()

bool AMDGPUSubtarget::hasD16Writes32BitVgpr	(	)	const

◆ hasDsSrc2Insts()

bool llvm::AMDGPUSubtarget::hasDsSrc2Insts ( ) const	inline

◆ hasF16BF16ToFP6BF6ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasF16BF16ToFP6BF6ConversionScaleInsts ( ) const	inline

◆ hasF32ToF16BF16ConversionSRInsts()

bool llvm::AMDGPUSubtarget::hasF32ToF16BF16ConversionSRInsts ( ) const	inline

◆ hasFastFMAF32()

bool llvm::AMDGPUSubtarget::hasFastFMAF32 ( ) const	inline

◆ hasFminFmaxLegacy()

bool llvm::AMDGPUSubtarget::hasFminFmaxLegacy ( ) const	inline

◆ hasFP4ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP4ConversionScaleInsts ( ) const	inline

◆ hasFP6BF6ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP6BF6ConversionScaleInsts ( ) const	inline

◆ hasFP8ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP8ConversionScaleInsts ( ) const	inline

◆ hasInv2PiInlineImm()

bool llvm::AMDGPUSubtarget::hasInv2PiInlineImm ( ) const	inline

◆ hasMadMacF32Insts()

bool llvm::AMDGPUSubtarget::hasMadMacF32Insts ( ) const	inline

◆ hasMadMixInsts()

bool llvm::AMDGPUSubtarget::hasMadMixInsts ( ) const	inline

◆ hasMulI24()

bool llvm::AMDGPUSubtarget::hasMulI24 ( ) const	inline

◆ hasMulU24()

bool llvm::AMDGPUSubtarget::hasMulU24 ( ) const	inline

◆ hasSDWA()

bool llvm::AMDGPUSubtarget::hasSDWA ( ) const	inline

◆ hasSMulHi()

bool llvm::AMDGPUSubtarget::hasSMulHi ( ) const	inline

◆ hasTrigReducedRange()

bool llvm::AMDGPUSubtarget::hasTrigReducedRange ( ) const	inline

◆ hasTrue16BitInsts()

bool llvm::AMDGPUSubtarget::hasTrue16BitInsts ( ) const	inline

◆ hasVOP3PInsts()

bool llvm::AMDGPUSubtarget::hasVOP3PInsts ( ) const	inline

◆ hasWavefrontsEvenlySplittingXDim()

bool AMDGPUSubtarget::hasWavefrontsEvenlySplittingXDim	(	const Function &	F,
bool	REquiresUniformYZ = false ) const

Returns

true if F will execute in a manner that leaves the X dimensions of the workitem ID evenly tiling wavefronts - that is, if X / wavefrontsize is uniform. This is true if either the Y and Z block dimensions are known to always be 1 or if the X dimension will always be a power of 2. If RequireUniformYZ is true, it also ensures that the Y and Z workitem IDs will be uniform (so, while a (32, 2, 1) launch with wavesize64 would ordinarily pass this test, it won't with \pRequiresUniformYZ).

This information is currently only gathered from the !reqd_work_group_size metadata on F, but this may be improved in the future.

Definition at line 245 of file AMDGPUSubtarget.cpp.

References llvm::mdconst::extract(), F, getWavefrontSize(), and llvm::isPowerOf2_32().

◆ isAmdHsaOrMesa()

◆ isAmdHsaOS()

bool llvm::AMDGPUSubtarget::isAmdHsaOS ( ) const	inline

◆ isAmdPalOS()

bool llvm::AMDGPUSubtarget::isAmdPalOS ( ) const	inline

◆ isGCN()

bool llvm::AMDGPUSubtarget::isGCN ( ) const	inline

◆ isGCN3Encoding()

bool llvm::AMDGPUSubtarget::isGCN3Encoding ( ) const	inline

◆ isMesa3DOS()

bool llvm::AMDGPUSubtarget::isMesa3DOS ( ) const	inline

◆ isMesaKernel()

◆ isPromoteAllocaEnabled()

bool llvm::AMDGPUSubtarget::isPromoteAllocaEnabled ( ) const	inline

◆ isSingleLaneExecution()

◆ makeLIDRangeMetadata()

◆ useRealTrue16Insts()

bool AMDGPUSubtarget::useRealTrue16Insts	(	)	const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.

Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.

Definition at line 37 of file AMDGPUSubtarget.cpp.

References EnableRealTrue16Insts, and hasTrue16BitInsts().

◆ AddressableLocalMemorySize

unsigned llvm::AMDGPUSubtarget::AddressableLocalMemorySize = 0	protected

◆ EnableD16Writes32BitVgpr

bool llvm::AMDGPUSubtarget::EnableD16Writes32BitVgpr = false	protected

◆ EnablePromoteAlloca

bool llvm::AMDGPUSubtarget::EnablePromoteAlloca = false	protected

◆ EnableRealTrue16Insts

bool llvm::AMDGPUSubtarget::EnableRealTrue16Insts = false	protected

◆ EUsPerCU

unsigned llvm::AMDGPUSubtarget::EUsPerCU = 4	protected

◆ FastFMAF32

bool llvm::AMDGPUSubtarget::FastFMAF32 = false	protected

◆ GCN3Encoding

bool llvm::AMDGPUSubtarget::GCN3Encoding = false	protected

◆ Has16BitInsts

bool llvm::AMDGPUSubtarget::Has16BitInsts = false	protected

◆ HasBF16ConversionInsts

bool llvm::AMDGPUSubtarget::HasBF16ConversionInsts = false	protected

◆ HasBF16PackedInsts

bool llvm::AMDGPUSubtarget::HasBF16PackedInsts = false	protected

◆ HasBF16TransInsts

bool llvm::AMDGPUSubtarget::HasBF16TransInsts = false	protected

◆ HasBF8ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasBF8ConversionScaleInsts = false	protected

◆ HasCvtPkF16F32Inst

bool llvm::AMDGPUSubtarget::HasCvtPkF16F32Inst = false	protected

◆ HasDsSrc2Insts

bool llvm::AMDGPUSubtarget::HasDsSrc2Insts = false	protected

◆ HasF16BF16ToFP6BF6ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasF16BF16ToFP6BF6ConversionScaleInsts = false	protected

◆ HasF32ToF16BF16ConversionSRInsts

bool llvm::AMDGPUSubtarget::HasF32ToF16BF16ConversionSRInsts = false	protected

◆ HasFminFmaxLegacy

bool llvm::AMDGPUSubtarget::HasFminFmaxLegacy = true	protected

◆ HasFP4ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP4ConversionScaleInsts = false	protected

◆ HasFP6BF6ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP6BF6ConversionScaleInsts = false	protected

◆ HasFP8ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP8ConversionScaleInsts = false	protected

◆ HasInv2PiInlineImm

bool llvm::AMDGPUSubtarget::HasInv2PiInlineImm = false	protected

◆ HasMadMacF32Insts

bool llvm::AMDGPUSubtarget::HasMadMacF32Insts = false	protected

◆ HasMadMixInsts

bool llvm::AMDGPUSubtarget::HasMadMixInsts = false	protected

◆ HasMulI24

bool llvm::AMDGPUSubtarget::HasMulI24 = true	protected

◆ HasMulU24

bool llvm::AMDGPUSubtarget::HasMulU24 = true	protected

◆ HasSDWA

bool llvm::AMDGPUSubtarget::HasSDWA = false	protected

◆ HasSMulHi

bool llvm::AMDGPUSubtarget::HasSMulHi = false	protected

◆ HasTrigReducedRange

bool llvm::AMDGPUSubtarget::HasTrigReducedRange = false	protected

◆ HasTrue16BitInsts

bool llvm::AMDGPUSubtarget::HasTrue16BitInsts = false	protected

◆ HasVOP3PInsts

bool llvm::AMDGPUSubtarget::HasVOP3PInsts = false	protected

◆ LocalMemorySize

unsigned llvm::AMDGPUSubtarget::LocalMemorySize = 0	protected

◆ MaxWavesPerEU

unsigned llvm::AMDGPUSubtarget::MaxWavesPerEU = 10	protected

◆ WavefrontSizeLog2

char llvm::AMDGPUSubtarget::WavefrontSizeLog2 = 0	protected

The documentation for this class was generated from the following files:

lib/Target/AMDGPU/AMDGPUSubtarget.h
lib/Target/AMDGPU/AMDGPUSubtarget.cpp