AMDGPULegalizerInfo.cpp File Reference (original) (raw)

This file implements the targeting of the Machinelegalizer class for AMDGPU. More...

Functions
static LLT	getPow2VectorType (LLT Ty)

static LLT	getPow2ScalarType (LLT Ty)

static LegalityPredicate	isSmallOddVector (unsigned TypeIdx)

static LegalityPredicate	sizeIsMultipleOf32 (unsigned TypeIdx)

static LegalityPredicate	isWideVec16 (unsigned TypeIdx)

static LegalizeMutation	oneMoreElement (unsigned TypeIdx)

static LegalizeMutation	fewerEltsToSize64Vector (unsigned TypeIdx)

static LegalizeMutation	moreEltsToNext32Bit (unsigned TypeIdx)

static LegalizeMutation	moreElementsToNextExistingRegClass (unsigned TypeIdx)

static LLT	getBufferRsrcScalarType (const LLT Ty)

static LLT	getBufferRsrcRegisterType (const LLT Ty)

static LLT	getBitcastRegisterType (const LLT Ty)

static LegalizeMutation	bitcastToRegisterType (unsigned TypeIdx)

static LegalizeMutation	bitcastToVectorElement32 (unsigned TypeIdx)

static LegalityPredicate	vectorSmallerThan (unsigned TypeIdx, unsigned Size)

static LegalityPredicate	vectorWiderThan (unsigned TypeIdx, unsigned Size)

static LegalityPredicate	numElementsNotEven (unsigned TypeIdx)

static bool	isRegisterSize (unsigned Size)

static bool	isRegisterVectorElementType (LLT EltTy)

static bool	isRegisterVectorType (LLT Ty)

static bool	isRegisterType (LLT Ty)

static LegalityPredicate	isRegisterType (unsigned TypeIdx)

static LegalityPredicate	isIllegalRegisterType (unsigned TypeIdx)

static LegalityPredicate	elementTypeIsLegal (unsigned TypeIdx)

static bool	isRegisterClassType (LLT Ty)

static LegalityPredicate	isRegisterClassType (unsigned TypeIdx)

static LegalityPredicate	isWideScalarExtLoadTruncStore (unsigned TypeIdx)

static unsigned	maxSizeForAddrSpace (const GCNSubtarget &ST, unsigned AS, bool IsLoad, bool IsAtomic)

static bool	isLoadStoreSizeLegal (const GCNSubtarget &ST, const LegalityQuery &Query)

static bool	hasBufferRsrcWorkaround (const LLT Ty)

static bool	loadStoreBitcastWorkaround (const LLT Ty)

static bool	isLoadStoreLegal (const GCNSubtarget &ST, const LegalityQuery &Query)

static bool	shouldBitcastLoadStoreType (const GCNSubtarget &ST, const LLT Ty, const LLT MemTy)
	Return true if a load or store of the type should be lowered with a bitcast to a different type.

static bool	shouldWidenLoad (const GCNSubtarget &ST, LLT MemoryTy, uint64_t AlignInBits, unsigned AddrSpace, unsigned Opcode)
	Return true if we should legalize a load by widening an odd sized memory access up to the alignment.

static bool	shouldWidenLoad (const GCNSubtarget &ST, const LegalityQuery &Query, unsigned Opcode)

static LLT	castBufferRsrcFromV4I32 (MachineInstr &MI, MachineIRBuilder &B, MachineRegisterInfo &MRI, unsigned Idx)
	Mutates IR (typicaly a load instruction) to use a <4 x s32> as the initial type of the operand idx and then to transform it to a p8 via bitcasts and inttoptr.

static Register	castBufferRsrcToV4I32 (Register Pointer, MachineIRBuilder &B)
	Cast a buffer resource (an address space 8 pointer) into a 4xi32, which is the form in which the value must be in order to be passed to the low-level representations used for MUBUF/MTBUF intrinsics.

static void	castBufferRsrcArgToV4I32 (MachineInstr &MI, MachineIRBuilder &B, unsigned Idx)

static bool	isKnownNonNull (Register Val, MachineRegisterInfo &MRI, const AMDGPUTargetMachine &TM, unsigned AddrSpace)
	Return true if the value is a known valid address, such that a null check is not necessary.

static MachineInstrBuilder	extractF64Exponent (Register Hi, MachineIRBuilder &B)

static LLT	widenToNextPowerOf2 (LLT Ty)

static bool	valueIsKnownNeverF32Denorm (const MachineRegisterInfo &MRI, Register Src)
	Return true if it's known that Src can never be an f32 denormal value.

static bool	allowApproxFunc (const MachineFunction &MF, unsigned Flags)

static bool	needsDenormHandlingF32 (const MachineFunction &MF, Register Src, unsigned Flags)

static Register	getMad (MachineIRBuilder &B, LLT Ty, Register X, Register Y, Register Z, unsigned Flags)

static Register	stripAnySourceMods (Register OrigSrc, MachineRegisterInfo &MRI)

static bool	isNot (const MachineRegisterInfo &MRI, const MachineInstr &MI)

static MachineInstr *	verifyCFIntrinsic (MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr &Br, MachineBasicBlock &UncondBrTarget, bool &Negated)

static bool	replaceWithConstant (MachineIRBuilder &B, MachineInstr &MI, int64_t C)

static std::pair< Register, Register >	emitReciprocalU64 (MachineIRBuilder &B, Register Val)

static void	toggleSPDenormMode (bool Enable, MachineIRBuilder &B, const GCNSubtarget &ST, SIModeRegisterDefaults Mode)

static void	buildBufferLoad (unsigned Opc, Register LoadDstReg, Register RSrc, Register VIndex, Register VOffset, Register SOffset, unsigned ImmOffset, unsigned Format, unsigned AuxiliaryData, MachineMemOperand *MMO, bool IsTyped, bool HasVIndex, MachineIRBuilder &B)

static unsigned	getBufferAtomicPseudo (Intrinsic::ID IntrID)

static void	packImage16bitOpsToDwords (MachineIRBuilder &B, MachineInstr &MI, SmallVectorImpl< Register > &PackedAddrs, unsigned ArgOffset, const AMDGPU::ImageDimIntrinsicInfo *Intr, bool IsA16, bool IsG16)
	Turn a set of s16 typed registers in AddrRegs into a dword sized vector with s16 typed elements.

static void	convertImageAddrToPacked (MachineIRBuilder &B, MachineInstr &MI, int DimIdx, int NumVAddrs)
	Convert from separate vaddr components to a single vector address register, and replace the remaining operands with $noreg.

Variables
static cl::opt< bool >	EnableNewLegality ("amdgpu-global-isel-new-legality", cl::desc("Use GlobalISel desired legality, rather than try to use" "rules compatible with selection patterns"), cl::init(false), cl::ReallyHidden)

static constexpr unsigned	MaxRegisterSize = 1024

static const LLT	S1 = LLT::scalar(1)

static const LLT	S8 = LLT::scalar(8)

static const LLT	S16 = LLT::scalar(16)

static const LLT	S32 = LLT::scalar(32)

static const LLT	F32 = LLT::float32()

static const LLT	S64 = LLT::scalar(64)

static const LLT	F64 = LLT::float64()

static const LLT	S96 = LLT::scalar(96)

static const LLT	S128 = LLT::scalar(128)

static const LLT	S160 = LLT::scalar(160)

static const LLT	S192 = LLT::scalar(192)

static const LLT	S224 = LLT::scalar(224)

static const LLT	S256 = LLT::scalar(256)

static const LLT	S512 = LLT::scalar(512)

static const LLT	S1024 = LLT::scalar(1024)

static const LLT	MaxScalar = LLT::scalar(MaxRegisterSize)

static const LLT	V2S8 = LLT::fixed_vector(2, 8)

static const LLT	V2S16 = LLT::fixed_vector(2, 16)

static const LLT	V4S16 = LLT::fixed_vector(4, 16)

static const LLT	V6S16 = LLT::fixed_vector(6, 16)

static const LLT	V8S16 = LLT::fixed_vector(8, 16)

static const LLT	V10S16 = LLT::fixed_vector(10, 16)

static const LLT	V12S16 = LLT::fixed_vector(12, 16)

static const LLT	V16S16 = LLT::fixed_vector(16, 16)

static const LLT	V2F16 = LLT::fixed_vector(2, LLT::float16())

static const LLT	V2BF16 = V2F16

static const LLT	V2S32 = LLT::fixed_vector(2, 32)

static const LLT	V3S32 = LLT::fixed_vector(3, 32)

static const LLT	V4S32 = LLT::fixed_vector(4, 32)

static const LLT	V5S32 = LLT::fixed_vector(5, 32)

static const LLT	V6S32 = LLT::fixed_vector(6, 32)

static const LLT	V7S32 = LLT::fixed_vector(7, 32)

static const LLT	V8S32 = LLT::fixed_vector(8, 32)

static const LLT	V9S32 = LLT::fixed_vector(9, 32)

static const LLT	V10S32 = LLT::fixed_vector(10, 32)

static const LLT	V11S32 = LLT::fixed_vector(11, 32)

static const LLT	V12S32 = LLT::fixed_vector(12, 32)

static const LLT	V16S32 = LLT::fixed_vector(16, 32)

static const LLT	V32S32 = LLT::fixed_vector(32, 32)

static const LLT	V2S64 = LLT::fixed_vector(2, 64)

static const LLT	V3S64 = LLT::fixed_vector(3, 64)

static const LLT	V4S64 = LLT::fixed_vector(4, 64)

static const LLT	V5S64 = LLT::fixed_vector(5, 64)

static const LLT	V6S64 = LLT::fixed_vector(6, 64)

static const LLT	V7S64 = LLT::fixed_vector(7, 64)

static const LLT	V8S64 = LLT::fixed_vector(8, 64)

static const LLT	V16S64 = LLT::fixed_vector(16, 64)

static const LLT	V2S128 = LLT::fixed_vector(2, 128)

static const LLT	V4S128 = LLT::fixed_vector(4, 128)

static std::initializer_list< LLT >	AllScalarTypes

static std::initializer_list< LLT >	AllS16Vectors

static std::initializer_list< LLT >	AllS32Vectors

static std::initializer_list< LLT >	AllS64Vectors

static constexpr unsigned	SPDenormModeBitField

static constexpr unsigned	FPEnvModeBitField

static constexpr unsigned	FPEnvTrapBitField

This file implements the targeting of the Machinelegalizer class for AMDGPU.

Todo:

This should be generated by TableGen.

Definition in file AMDGPULegalizerInfo.cpp.

◆ DEBUG_TYPE

#define DEBUG_TYPE "amdgpu-legalinfo"

◆ allowApproxFunc()

◆ bitcastToRegisterType()

◆ bitcastToVectorElement32()

◆ buildBufferLoad()

static void buildBufferLoad ( unsigned Opc, Register LoadDstReg, Register RSrc, Register VIndex, Register VOffset, Register SOffset, unsigned ImmOffset, unsigned Format, unsigned AuxiliaryData, MachineMemOperand * MMO, bool IsTyped, bool HasVIndex, MachineIRBuilder & B )	static

◆ castBufferRsrcArgToV4I32()

Definition at line 645 of file AMDGPULegalizerInfo.cpp.

References B, castBufferRsrcToV4I32(), llvm::MachineOperand::getReg(), hasBufferRsrcWorkaround(), Idx, MI, and llvm::MachineOperand::setReg().

Referenced by llvm::AMDGPULegalizerInfo::legalizeBufferAtomic(), llvm::AMDGPULegalizerInfo::legalizeBufferLoad(), llvm::AMDGPULegalizerInfo::legalizeBufferStore(), llvm::AMDGPULegalizerInfo::legalizeSBufferPrefetch(), and llvm::AMDGPULegalizerInfo::legalizeStore().

◆ castBufferRsrcFromV4I32()

Mutates IR (typicaly a load instruction) to use a <4 x s32> as the initial type of the operand idx and then to transform it to a p8 via bitcasts and inttoptr.

In addition, handle vectors of p8. Returns the new type.

Definition at line 585 of file AMDGPULegalizerInfo.cpp.

References B, getBufferRsrcRegisterType(), getBufferRsrcScalarType(), llvm::MachineOperand::getReg(), hasBufferRsrcWorkaround(), I, Idx, MI, MRI, S32, llvm::LLT::scalar(), and llvm::MachineOperand::setReg().

Referenced by llvm::AMDGPULegalizerInfo::legalizeBufferLoad(), llvm::AMDGPULegalizerInfo::legalizeLoad(), and llvm::AMDGPULegalizerInfo::legalizeSBufferLoad().

◆ castBufferRsrcToV4I32()

Cast a buffer resource (an address space 8 pointer) into a 4xi32, which is the form in which the value must be in order to be passed to the low-level representations used for MUBUF/MTBUF intrinsics.

This is a hack, which is needed in order to account for the fact that we can't define a register class for s128 without breaking SelectionDAG.

Definition at line 626 of file AMDGPULegalizerInfo.cpp.

References B, getBufferRsrcRegisterType(), getBufferRsrcScalarType(), I, MRI, llvm::SmallVectorTemplateBase< T, bool >::push_back(), and llvm::LLT::scalar().

Referenced by castBufferRsrcArgToV4I32(), and llvm::AMDGPULegalizerInfo::fixStoreSourceType().

◆ convertImageAddrToPacked()

Convert from separate vaddr components to a single vector address register, and replace the remaining operands with $noreg.

Definition at line 6359 of file AMDGPULegalizerInfo.cpp.

References assert(), B, llvm::LLT::fixed_vector(), llvm::SrcOp::getReg(), I, MI, llvm::SmallVectorTemplateBase< T, bool >::push_back(), S32, llvm::LLT::scalar(), and llvm::SmallVectorBase< Size_T >::size().

Referenced by llvm::AMDGPULegalizerInfo::legalizeImageIntrinsic().

◆ elementTypeIsLegal()

◆ emitReciprocalU64()

◆ extractF64Exponent()

◆ fewerEltsToSize64Vector()

◆ getBitcastRegisterType()

static LLT getBitcastRegisterType ( const LLT Ty)	static

◆ getBufferAtomicPseudo()

◆ getBufferRsrcRegisterType()

static LLT getBufferRsrcRegisterType ( const LLT Ty)	static

◆ getBufferRsrcScalarType()

static LLT getBufferRsrcScalarType ( const LLT Ty)	static

◆ getMad()

◆ getPow2ScalarType()

static LLT getPow2ScalarType ( LLT Ty)	static

◆ getPow2VectorType()

static LLT getPow2VectorType ( LLT Ty)	static

◆ hasBufferRsrcWorkaround()

Definition at line 478 of file AMDGPULegalizerInfo.cpp.

References llvm::AMDGPUAS::BUFFER_RESOURCE, llvm::LLT::getAddressSpace(), llvm::LLT::getElementType(), hasBufferRsrcWorkaround(), llvm::LLT::isPointer(), and llvm::LLT::isVector().

Referenced by castBufferRsrcArgToV4I32(), castBufferRsrcFromV4I32(), llvm::AMDGPULegalizerInfo::fixStoreSourceType(), hasBufferRsrcWorkaround(), isLoadStoreLegal(), llvm::AMDGPULegalizerInfo::legalizeBufferLoad(), llvm::AMDGPULegalizerInfo::legalizeLoad(), llvm::AMDGPULegalizerInfo::legalizeSBufferLoad(), llvm::AMDGPULegalizerInfo::legalizeStore(), and loadStoreBitcastWorkaround().

◆ isIllegalRegisterType()

◆ isKnownNonNull()

◆ isLoadStoreLegal()

◆ isLoadStoreSizeLegal()

Definition at line 405 of file AMDGPULegalizerInfo.cpp.

References llvm::SITargetLowering::allowsMisalignedMemoryAccessesImpl(), assert(), llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT, llvm::LLT::getSizeInBits(), llvm::LLT::isVector(), maxSizeForAddrSpace(), llvm::LegalityQuery::MMODescrs, llvm::LegalityQuery::Opcode, RegSize, Size, and llvm::LegalityQuery::Types.

Referenced by isLoadStoreLegal().

◆ isNot()

◆ isRegisterClassType() [1/2]

static bool isRegisterClassType ( LLT Ty)	static

◆ isRegisterClassType() [2/2]

◆ isRegisterSize()

◆ isRegisterType() [1/2]

static bool isRegisterType ( LLT Ty)	static

◆ isRegisterType() [2/2]

◆ isRegisterVectorElementType()

static bool isRegisterVectorElementType ( LLT EltTy)	static

◆ isRegisterVectorType()

static bool isRegisterVectorType ( LLT Ty)	static

◆ isSmallOddVector()

◆ isWideScalarExtLoadTruncStore()

◆ isWideVec16()

◆ loadStoreBitcastWorkaround()

static bool loadStoreBitcastWorkaround ( const LLT Ty)	static

◆ maxSizeForAddrSpace()

◆ moreElementsToNextExistingRegClass()

◆ moreEltsToNext32Bit()

◆ needsDenormHandlingF32()

◆ numElementsNotEven()

◆ oneMoreElement()

◆ packImage16bitOpsToDwords()

Turn a set of s16 typed registers in AddrRegs into a dword sized vector with s16 typed elements.

Definition at line 6298 of file AMDGPULegalizerInfo.cpp.

References assert(), B, llvm::LLT::fixed_vector(), llvm::SrcOp::getReg(), I, Intr, MI, llvm::SmallVectorTemplateBase< T, bool >::push_back(), S16, llvm::LLT::scalar(), and V2S16.

Referenced by llvm::AMDGPULegalizerInfo::legalizeImageIntrinsic().

◆ replaceWithConstant()

◆ shouldBitcastLoadStoreType()

◆ shouldWidenLoad() [1/2]

◆ shouldWidenLoad() [2/2]

◆ sizeIsMultipleOf32()

◆ stripAnySourceMods()

◆ toggleSPDenormMode()

◆ valueIsKnownNeverF32Denorm()

◆ vectorSmallerThan()

◆ vectorWiderThan()

◆ verifyCFIntrinsic()

Definition at line 4237 of file AMDGPULegalizerInfo.cpp.

References llvm::MachineBasicBlock::end(), llvm::MachineFunction::end(), llvm::eraseInstr(), llvm::ilist_node_impl< OptionsT >::getIterator(), llvm::MachineOperand::getMBB(), llvm::MachineInstr::getOpcode(), llvm::MachineInstr::getOperand(), llvm::MachineBasicBlock::getParent(), llvm::MachineInstr::getParent(), llvm::MachineOperand::getReg(), isNot(), MI, MRI, and UseMI.

Referenced by llvm::AMDGPULegalizerInfo::legalizeIntrinsic().

◆ widenToNextPowerOf2()

static LLT widenToNextPowerOf2 ( LLT Ty)	static

◆ AllS16Vectors

std::initializer_list<LLT> AllS16Vectors	static

◆ AllS32Vectors

std::initializer_list<LLT> AllS32Vectors	static

◆ AllS64Vectors

std::initializer_list<LLT> AllS64Vectors	static

◆ AllScalarTypes

std::initializer_list<LLT> AllScalarTypes	static

◆ EnableNewLegality

cl::opt< bool > EnableNewLegality("amdgpu-global-isel-new-legality", cl::desc("Use GlobalISel desired legality, rather than try to use" "rules compatible with selection patterns"), cl::init(false), cl::ReallyHidden) ( "amdgpu-global-isel-new-legality" , cl::desc("Use GlobalISel desired legality, rather than try to use" "rules compatible with selection patterns") , cl::init(false) , cl::ReallyHidden )

static

◆ F32

Definition at line 286 of file AMDGPULegalizerInfo.cpp.

◆ F64

◆ FPEnvModeBitField

◆ FPEnvTrapBitField

◆ MaxRegisterSize

constexpr unsigned MaxRegisterSize = 1024	staticconstexpr

◆ MaxScalar

◆ S1

Definition at line 282 of file AMDGPULegalizerInfo.cpp.

◆ S1024

◆ S128

◆ S16

Definition at line 284 of file AMDGPULegalizerInfo.cpp.

◆ S160

◆ S192

◆ S224

◆ S256

◆ S32

Definition at line 285 of file AMDGPULegalizerInfo.cpp.

◆ S512

◆ S64

Definition at line 287 of file AMDGPULegalizerInfo.cpp.

◆ S8

◆ S96

◆ SPDenormModeBitField

◆ V10S16

const LLT V10S16 = LLT::fixed_vector(10, 16)	static

◆ V10S32

const LLT V10S32 = LLT::fixed_vector(10, 32)	static

◆ V11S32

const LLT V11S32 = LLT::fixed_vector(11, 32)	static

◆ V12S16

const LLT V12S16 = LLT::fixed_vector(12, 16)	static

◆ V12S32

const LLT V12S32 = LLT::fixed_vector(12, 32)	static

◆ V16S16

const LLT V16S16 = LLT::fixed_vector(16, 16)	static

◆ V16S32

const LLT V16S32 = LLT::fixed_vector(16, 32)	static

◆ V16S64

const LLT V16S64 = LLT::fixed_vector(16, 64)	static

◆ V2BF16

◆ V2F16

const LLT V2F16 = LLT::fixed_vector(2, LLT::float16())	static

◆ V2S128

const LLT V2S128 = LLT::fixed_vector(2, 128)	static

◆ V2S16

const LLT V2S16 = LLT::fixed_vector(2, 16)	static

◆ V2S32

const LLT V2S32 = LLT::fixed_vector(2, 32)	static

◆ V2S64

const LLT V2S64 = LLT::fixed_vector(2, 64)	static

◆ V2S8

const LLT V2S8 = LLT::fixed_vector(2, 8)	static

◆ V32S32

const LLT V32S32 = LLT::fixed_vector(32, 32)	static

◆ V3S32

const LLT V3S32 = LLT::fixed_vector(3, 32)	static

◆ V3S64

const LLT V3S64 = LLT::fixed_vector(3, 64)	static

◆ V4S128

const LLT V4S128 = LLT::fixed_vector(4, 128)	static

◆ V4S16

const LLT V4S16 = LLT::fixed_vector(4, 16)	static

◆ V4S32

const LLT V4S32 = LLT::fixed_vector(4, 32)	static

◆ V4S64

const LLT V4S64 = LLT::fixed_vector(4, 64)	static

◆ V5S32

const LLT V5S32 = LLT::fixed_vector(5, 32)	static

◆ V5S64

const LLT V5S64 = LLT::fixed_vector(5, 64)	static

◆ V6S16

const LLT V6S16 = LLT::fixed_vector(6, 16)	static

◆ V6S32

const LLT V6S32 = LLT::fixed_vector(6, 32)	static

◆ V6S64

const LLT V6S64 = LLT::fixed_vector(6, 64)	static

◆ V7S32

const LLT V7S32 = LLT::fixed_vector(7, 32)	static

◆ V7S64

const LLT V7S64 = LLT::fixed_vector(7, 64)	static

◆ V8S16

const LLT V8S16 = LLT::fixed_vector(8, 16)	static

◆ V8S32

const LLT V8S32 = LLT::fixed_vector(8, 32)	static

◆ V8S64

const LLT V8S64 = LLT::fixed_vector(8, 64)	static

◆ V9S32

const LLT V9S32 = LLT::fixed_vector(9, 32)	static