NVPTXISelLowering.cpp File Reference (original) (raw)

Go to the source code of this file.

Macros
#define	DEBUG_TYPE "nvptx-lower"

Enumerations
enum	OperandSignedness { Signed = 0 , Unsigned, Unknown }

Functions
static bool	IsPTXVectorType (MVT VT)
static std::optional< std::pair< unsigned int, MVT > >	getVectorLoweringShape (EVT VectorEVT, const NVPTXSubtarget &STI, unsigned AddressSpace)
static void	ComputePTXValueVTs (const TargetLowering &TLI, const DataLayout &DL, LLVMContext &Ctx, CallingConv::ID CallConv, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > &Offsets, uint64_t StartingOffset=0)
	ComputePTXValueVTs - For the given Type Ty, returns the set of primitive legal-ish MVTs that compose it.
static EVT	getVectorizedVT (EVT VT, unsigned N, LLVMContext &C)
static SDValue	getExtractVectorizedValue (SDValue V, unsigned I, EVT VT, const SDLoc &dl, SelectionDAG &DAG)
template<typename T>
static SDValue	getBuildVectorizedValue (unsigned N, const SDLoc &dl, SelectionDAG &DAG, T GetElement)
static EVT	promoteScalarIntegerPTX (const EVT VT)
	PromoteScalarIntegerPTX Used to make sure the arguments/returns are suitable for passing and promote them to a larger size if they're not.
template<typename T>
static unsigned	canMergeParamLoadStoresStartingAt (unsigned Idx, uint32_t AccessSize, const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< T > &Offsets, Align ParamAlignment)
template<typename T>
static SmallVector< unsigned, 16 >	VectorizePTXValueVTs (const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< T > &Offsets, Align ParamAlignment, bool IsVAArg=false)
static bool	shouldConvertToIndirectCall (const CallBase CB, const GlobalAddressSDNode Func)
static MachinePointerInfo	refinePtrAS (SDValue &Ptr, SelectionDAG &DAG, const DataLayout &DL, const TargetLowering &TL)
static ISD::NodeType	getExtOpcode (const ISD::ArgFlagsTy &Flags)
static SDValue	correctParamType (SDValue V, EVT ExpectedVT, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl)
static SDValue	getPRMT (SDValue A, SDValue B, SDValue Selector, SDLoc DL, SelectionDAG &DAG, unsigned Mode=NVPTX::PTXPrmtMode::NONE)
static SDValue	getPRMT (SDValue A, SDValue B, uint64_t Selector, SDLoc DL, SelectionDAG &DAG, unsigned Mode=NVPTX::PTXPrmtMode::NONE)
static SDValue	buildTreeReduction (const SmallVector< SDValue > &Elements, EVT EltTy, ArrayRef< std::pair< unsigned, unsigned > > Ops, const SDLoc &DL, const SDNodeFlags Flags, SelectionDAG &DAG)
	Reduces the elements using the scalar operations provided.
static ISD::NodeType	getScalarOpcodeForReduction (unsigned ReductionOpcode)
static std::optional< unsigned >	getScalar3OpcodeForReduction (unsigned ReductionOpcode)
	Get 3-input scalar reduction opcode.
static SDValue	PromoteBinOpToF32 (SDNode *N, SelectionDAG &DAG)
static SDValue	LowerVectorArith (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerTcgen05St (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerBSWAP (SDValue Op, SelectionDAG &DAG)
static unsigned	getTcgen05MMADisableOutputLane (unsigned IID)
static SDValue	LowerTcgen05MMADisableOutputLane (SDValue Op, SelectionDAG &DAG)
static std::optional< std::pair< SDValue, SDValue > >	lowerTcgen05Ld (SDNode *N, SelectionDAG &DAG, bool HasOffset=false)
static SDValue	lowerIntrinsicVoid (SDValue Op, SelectionDAG &DAG)
static SDValue	LowerClusterLaunchControlQueryCancel (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerCvtRSIntrinsics (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerPrmtIntrinsic (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerIntrinsicWChain (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerIntrinsicWOChain (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerCTLZCTPOP (SDValue Op, SelectionDAG &DAG)
static SDValue	expandFSH64 (SDValue A, SDValue B, SDValue ShiftAmount, SDLoc DL, unsigned Opcode, SelectionDAG &DAG)
static SDValue	lowerFSH (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerROT (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerFREM (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerSELECT (SDValue Op, SelectionDAG &DAG)
static SDValue	lowerMSTORE (SDValue Op, SelectionDAG &DAG)
static std::pair< MemSDNode *, uint32_t >	convertMLOADToLoadWithUsedBytesMask (MemSDNode *N, SelectionDAG &DAG)
static std::optional< std::pair< SDValue, SDValue > >	replaceLoadVector (SDNode *N, SelectionDAG &DAG, const NVPTXSubtarget &STI)
	replaceLoadVector - Convert vector loads into multi-output scalar loads.
static void	replaceLoadVector (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results, const NVPTXSubtarget &STI)
static SDValue	lowerLoadVector (SDNode *N, SelectionDAG &DAG, const NVPTXSubtarget &STI)
static SDValue	lowerLOADi1 (LoadSDNode *LD, SelectionDAG &DAG)
static SDValue	lowerSTOREVector (SDValue Op, SelectionDAG &DAG, const NVPTXSubtarget &STI)
static bool	isConstZero (const SDValue &Operand)
static SDValue	PerformADDCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI)
	PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue	PerformFADDCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
static SDValue	combineUnpackingMovIntoLoad (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
	Fold unpacking movs into a load by increasing the number of return values.
static SDValue	combinePackingMovIntoStore (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned Front, unsigned Back)
	Fold packing movs into a store.
static SDValue	combineSTORE (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &STI)
static SDValue	combineLOAD (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &STI)
static SDValue	PerformADDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
	PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static SDValue	PerformFADDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
	PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
static unsigned	getMinMax3Opcode (unsigned MinMax2Opcode)
	Get 3-input version of a 2-input min/max opcode.
static SDValue	PerformFMinMaxCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned PTXVersion, unsigned SmVersion)
	PerformFMinMaxCombine - Combine (fmaxnum (fmaxnum a, b), c) into (fmaxnum3 a, b, c).
static SDValue	PerformREMCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
static SDValue	combineMulWide (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
static bool	IsMulWideOperandDemotable (SDValue Op, unsigned OptSize, OperandSignedness &S)
	IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize bits without loss of information.
static bool	AreMulWideOperandsDemotable (SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned)
	AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits without loss of information.
static SDValue	TryMULWIDECombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
	TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e.
static bool	isConstOne (const SDValue &Operand)
static SDValue	matchMADConstOnePattern (SDValue Add)
static SDValue	combineMADConstOne (SDValue X, SDValue Add, EVT VT, SDLoc DL, TargetLowering::DAGCombinerInfo &DCI)
static SDValue	combineMulSelectConstOne (SDValue X, SDValue Select, EVT VT, SDLoc DL, TargetLowering::DAGCombinerInfo &DCI)
static SDValue	PerformMULCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI)
static SDValue	PerformMULCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
	PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
static SDValue	PerformSHLCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
	PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
static SDValue	PerformSETCCCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned int SmVersion)
static SDValue	PerformEXTRACTCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue	PerformVSELECTCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue	PerformBUILD_VECTORCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue	combineADDRSPACECAST (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static APInt	getPRMTSelector (const APInt &Selector, unsigned Mode)
static APInt	computePRMT (APInt A, APInt B, APInt Selector, unsigned Mode)
static SDValue	combinePRMT (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
static SDValue	sinkProxyReg (SDValue R, SDValue Chain, TargetLowering::DAGCombinerInfo &DCI)
static SDValue	combineProxyReg (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static void	ReplaceBITCAST (SDNode *Node, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
static void	ReplaceINTRINSIC_W_CHAIN (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
static void	ReplaceCopyFromReg_128 (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
static void	replaceProxyReg (SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, SmallVectorImpl< SDValue > &Results)
static void	replaceAtomicSwap128 (SDNode *N, SelectionDAG &DAG, const NVPTXSubtarget &STI, SmallVectorImpl< SDValue > &Results)
static void	computeKnownBitsForPRMT (const SDValue Op, KnownBits &Known, const SelectionDAG &DAG, unsigned Depth)
static void	computeKnownBitsForLoadV (const SDValue Op, KnownBits &Known)
static std::pair< APInt, APInt >	getPRMTDemandedBits (const APInt &SelectorVal, const APInt &DemandedBits)
static SDValue	canonicalizePRMTInput (SDValue Op, SelectionDAG &DAG)
static SDValue	simplifyDemandedBitsForPRMT (SDValue PRMT, const APInt &DemandedBits, SelectionDAG &DAG, const TargetLowering &TLI, unsigned Depth)

Variables
static cl::opt< bool >	sched4reg ("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
static cl::opt< unsigned >	FMAContractLevelOpt ("nvptx-fma-level", cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2))
static cl::opt< NVPTX::DivPrecisionLevel >	UsePrecDivF32 ("nvptx-prec-divf32", cl::Hidden, cl::desc("NVPTX Specific: Override the precision of the lowering for f32 fdiv"), cl::values(clEnumValN(NVPTX::DivPrecisionLevel::Approx, "0", "Use div.approx"), clEnumValN(NVPTX::DivPrecisionLevel::Full, "1", "Use div.full"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754, "2", "Use IEEE Compliant F32 div.rnd if available (default)"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754_NoFTZ, "3", "Use IEEE Compliant F32 div.rnd if available, no FTZ")), cl::init(NVPTX::DivPrecisionLevel::IEEE754))
static cl::opt< bool >	UsePrecSqrtF32 ("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
static cl::opt< bool >	UseApproxLog2F32 ("nvptx-approx-log2f32", cl::desc("NVPTX Specific: whether to use lg2.approx for log2"), cl::init(false))
	Whereas CUDA's implementation (see libdevice) uses ex2.approx for exp2(), it does NOT use lg2.approx for log2, so this is disabled by default.
static cl::opt< bool >	ForceMinByValParamAlign ("nvptx-force-min-byval-param-align", cl::Hidden, cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions."), cl::init(false))

◆ DEBUG_TYPE

#define DEBUG_TYPE "nvptx-lower"

◆ OperandSignedness

◆ AreMulWideOperandsDemotable()

◆ buildTreeReduction()

Reduces the elements using the scalar operations provided.

The operations are sorted descending in number of inputs they take. The flags on the original reduction operation will be propagated to each scalar operation. Nearby elements are grouped in tree reduction, unlike the shuffle reduction used in ExpandReductions and SelectionDAG.

Definition at line 1886 of file NVPTXISelLowering.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, assert(), DL, E(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::SelectionDAG::getNode(), I, OpIdx, llvm::SmallVectorTemplateBase< T, bool >::push_back(), and llvm::ArrayRef< T >::slice().

◆ canMergeParamLoadStoresStartingAt()

◆ canonicalizePRMTInput()

◆ combineADDRSPACECAST()

◆ combineLOAD()

◆ combineMADConstOne()

Definition at line 6086 of file NVPTXISelLowering.cpp.

References llvm::ISD::ADD, llvm::Add, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::SelectionDAG::getNode(), matchMADConstOnePattern(), llvm::ISD::MUL, Mul, SDValue(), X, and Y.

Referenced by PerformMULCombineWithOperands().

◆ combineMulSelectConstOne()

Definition at line 6097 of file NVPTXISelLowering.cpp.

References Cond, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::SelectionDAG::getNode(), isConstOne(), matchMADConstOnePattern(), llvm::ISD::MUL, SDValue(), llvm::ISD::SELECT, Select, X, and Y.

Referenced by PerformMULCombineWithOperands().

◆ combineMulWide()

Definition at line 5895 of file NVPTXISelLowering.cpp.

References llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), llvm::EVT::getSizeInBits(), llvm::isa(), llvm::ISD::MUL, N, llvm::None, RHS, SDValue(), llvm::ISD::SHL, llvm::ISD::SIGN_EXTEND, and llvm::ISD::ZERO_EXTEND.

◆ combinePackingMovIntoStore()

Fold packing movs into a store.

ex: v1: v2f16 = BUILD_VECTOR a:f16, b:f16 v2: v2f16 = BUILD_VECTOR c:f16, d:f16 StoreV2 v1, v2

...is turned into...

StoreV4 a, b, c, d

Definition at line 5661 of file NVPTXISelLowering.cpp.

References llvm::SmallVectorImpl< T >::append(), llvm::ISD::BUILD_VECTOR, llvm::cast(), llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::ISD::FP_ROUND, llvm::SelectionDAG::getMemIntrinsicNode(), llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG(), llvm::NVPTX::isPackedVectorTy(), llvm_unreachable, N, SDValue(), llvm::NVPTXISD::StoreV2, llvm::NVPTXISD::StoreV4, llvm::NVPTXISD::StoreV8, and llvm::ISD::TRUNCATE.

Referenced by combineSTORE().

◆ combinePRMT()

◆ combineProxyReg()

◆ combineSTORE()

◆ combineUnpackingMovIntoLoad()

Fold unpacking movs into a load by increasing the number of return values.

ex: L: v2f16,ch = load

a: f16 = extractelt L:0, 0 b: f16 = extractelt L:0, 1 use(a, b)

...is turned into...

L: f16,f16,ch = LoadV2

use(L:0, L:1)

Definition at line 5547 of file NVPTXISelLowering.cpp.

References llvm::all_of(), llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG(), llvm::NVPTX::isPackedVectorTy(), N, and SDValue().

Referenced by combineLOAD().

◆ computeKnownBitsForLoadV()

◆ computeKnownBitsForPRMT()

Definition at line 7051 of file NVPTXISelLowering.cpp.

References A(), llvm::KnownBits::ashr(), assert(), B(), llvm::SelectionDAG::computeKnownBits(), llvm::KnownBits::concat(), llvm::Depth, llvm::dyn_cast(), llvm::APInt::extractBits(), llvm::ConstantSDNode::getAPIntValue(), llvm::KnownBits::getBitWidth(), llvm::APInt::getHiBits(), llvm::APInt::getLoBits(), getPRMTSelector(), llvm::APInt::getZExtValue(), I, llvm::KnownBits::insertBits(), Mode, and llvm::seq().

Referenced by llvm::NVPTXTargetLowering::computeKnownBitsForTargetNode().

◆ computePRMT()

Definition at line 6407 of file NVPTXISelLowering.cpp.

References A(), assert(), B(), llvm::APInt::extractBits(), llvm::APInt::getBitWidth(), llvm::APInt::getHiBits(), llvm::APInt::getLoBits(), getPRMTSelector(), llvm::APInt::getZExtValue(), I, Mode, and llvm::seq().

Referenced by combinePRMT().

◆ ComputePTXValueVTs()

ComputePTXValueVTs - For the given Type Ty, returns the set of primitive legal-ish MVTs that compose it.

Unlike ComputeValueVTs, this will legalize the types as required by the calling convention (with special handling for i8s). NOTE: This is a band-aid for code that expects ComputeValueVTs to return the same number of types as the Ins/Outs arrays in LowerFormalArguments, LowerCall, and LowerReturn.

Definition at line 302 of file NVPTXISelLowering.cpp.

References assert(), llvm::ComputeValueVTs(), DL, llvm::TargetLoweringBase::getNumRegistersForCallingConv(), llvm::TargetLoweringBase::getRegisterTypeForCallingConv(), llvm::MVT::getStoreSize(), I, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::seq(), and llvm::zip().

Referenced by llvm::NVPTXTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerFormalArguments(), and llvm::NVPTXTargetLowering::LowerReturn().

◆ convertMLOADToLoadWithUsedBytesMask()

Definition at line 3456 of file NVPTXISelLowering.cpp.

References assert(), llvm::cast(), DL, llvm::SelectionDAG::getLoad(), llvm::SDValue::getNode(), llvm::SDValue::getOpcode(), llvm::EVT::getSizeInBits(), llvm::EVT::getVectorElementType(), llvm::EVT::isVector(), N, llvm::ISD::POISON, llvm::reverse(), and llvm::ISD::UNDEF.

Referenced by replaceLoadVector().

◆ correctParamType()

◆ expandFSH64()

Definition at line 3020 of file NVPTXISelLowering.cpp.

References A(), assert(), B(), DL, llvm::dyn_cast(), llvm::ISD::FSHL, llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), llvm::SDValue::getValue(), High, llvm::Low, SDValue(), and llvm::NVPTXISD::UNPACK_VECTOR.

Referenced by lowerFSH(), and lowerROT().

◆ getBuildVectorizedValue()

Definition at line 368 of file NVPTXISelLowering.cpp.

References llvm::SelectionDAG::ExtractVectorElements(), llvm::SelectionDAG::getBuildVector(), llvm::SelectionDAG::getContext(), llvm::SDValue::getValueType(), llvm::EVT::getVectorVT(), I, llvm::EVT::isVector(), N, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::seq(), llvm::SmallVectorTemplateCommon< T, typename >::size(), and T.

Referenced by llvm::NVPTXTargetLowering::LowerCall(), and llvm::NVPTXTargetLowering::LowerReturn().

◆ getExtOpcode()

◆ getExtractVectorizedValue()

◆ getMinMax3Opcode()

◆ getPRMT() [1/2]

Definition at line 1866 of file NVPTXISelLowering.cpp.

References A(), assert(), B(), DL, llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), llvm::SDValue::getValueType(), Mode, and llvm::NVPTX::PTXPrmtMode::NONE.

Referenced by getPRMT(), lowerBSWAP(), lowerPrmtIntrinsic(), PerformBUILD_VECTORCombine(), and simplifyDemandedBitsForPRMT().

◆ getPRMT() [2/2]

◆ getPRMTDemandedBits()

◆ getPRMTSelector()

Definition at line 6374 of file NVPTXISelLowering.cpp.

References assert(), llvm::NVPTX::PTXPrmtMode::B4E, llvm::NVPTX::PTXPrmtMode::ECL, llvm::NVPTX::PTXPrmtMode::ECR, llvm::NVPTX::PTXPrmtMode::F4E, llvm::APInt::getBitWidth(), llvm::APInt::getZExtValue(), llvm_unreachable, Mode, llvm::NVPTX::PTXPrmtMode::NONE, llvm::NVPTX::PTXPrmtMode::RC16, llvm::NVPTX::PTXPrmtMode::RC8, S1, and llvm::APInt::trunc().

Referenced by computeKnownBitsForPRMT(), computePRMT(), and simplifyDemandedBitsForPRMT().

◆ getScalar3OpcodeForReduction()

std::optional< unsigned > getScalar3OpcodeForReduction ( unsigned ReductionOpcode)	static

◆ getScalarOpcodeForReduction()

◆ getTcgen05MMADisableOutputLane()

◆ getVectorizedVT()

◆ getVectorLoweringShape()

Definition at line 200 of file NVPTXISelLowering.cpp.

References llvm::EVT::getSimpleVT(), llvm::EVT::getSizeInBits(), llvm::MVT::getSizeInBits(), llvm::MVT::getVectorElementType(), llvm::MVT::getVectorNumElements(), llvm::MVT::getVectorVT(), llvm::NVPTXSubtarget::has256BitVectorLoadStore(), llvm::NVPTXSubtarget::hasF32x2Instructions(), llvm::EVT::isScalarInteger(), llvm::EVT::isSimple(), llvm::MVT::isVector(), and llvm::MVT::SimpleTy.

Referenced by lowerSTOREVector(), and replaceLoadVector().

◆ isConstOne()

◆ isConstZero()

◆ IsMulWideOperandDemotable()

◆ IsPTXVectorType()

bool IsPTXVectorType ( MVT VT)	static

◆ lowerBSWAP()

Definition at line 2577 of file NVPTXISelLowering.cpp.

References llvm::ISD::ANY_EXTEND, DL, llvm::SelectionDAG::getBitcast(), llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), getPRMT(), llvm::EVT::getSimpleVT(), llvm::SDValue::getValue(), llvm_unreachable, llvm::MVT::SimpleTy, llvm::ISD::TRUNCATE, and llvm::NVPTXISD::UNPACK_VECTOR.

Referenced by llvm::NVPTXTargetLowering::LowerOperation().

◆ LowerClusterLaunchControlQueryCancel()

◆ lowerCTLZCTPOP()

◆ lowerCvtRSIntrinsics()

◆ lowerFREM()

Definition at line 3075 of file NVPTXISelLowering.cpp.

References llvm::SDNodeFlags::AllowContract, DL, llvm::ISD::FDIV, llvm::ISD::FMUL, llvm::ISD::FSUB, llvm::SelectionDAG::getConstantFP(), llvm::APFloat::getInf(), llvm::SelectionDAG::getNode(), llvm::SelectionDAG::getSelect(), llvm::SelectionDAG::getSetCC(), Mul, llvm::ISD::SETEQ, llvm::Sub, X, and Y.

Referenced by llvm::NVPTXTargetLowering::LowerOperation().

◆ lowerFSH()

◆ lowerIntrinsicVoid()

◆ lowerIntrinsicWChain()

◆ lowerIntrinsicWOChain()

◆ lowerLOADi1()

◆ lowerLoadVector()

◆ lowerMSTORE()

Definition at line 3137 of file NVPTXISelLowering.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, assert(), llvm::ISD::BUILD_VECTOR, llvm::cast(), DL, llvm::enumerate(), llvm::ISD::EXTRACT_VECTOR_ELT, llvm::MemSDNode::getAlign(), llvm::SelectionDAG::getEVTAlign(), llvm::SelectionDAG::getIntPtrConstant(), llvm::SelectionDAG::getMemIntrinsicNode(), llvm::MemSDNode::getMemOperand(), llvm::MemSDNode::getMemoryVT(), llvm::SelectionDAG::getNode(), llvm::SelectionDAG::getRegister(), llvm::EVT::getSimpleVT(), llvm::SDValue::getValueType(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), llvm::SelectionDAG::getVTList(), I, llvm::EVT::isVector(), llvm_unreachable, N, llvm::MCRegister::NoRegister, llvm::Offset, llvm::MVT::SimpleTy, llvm::NVPTXISD::StoreV4, llvm::NVPTXISD::StoreV8, and llvm::ISD::UNDEF.

Referenced by llvm::NVPTXTargetLowering::LowerOperation().

◆ lowerPrmtIntrinsic()

Definition at line 2924 of file NVPTXISelLowering.cpp.

References A(), B(), llvm::NVPTX::PTXPrmtMode::B4E, DL, llvm::NVPTX::PTXPrmtMode::ECL, llvm::NVPTX::PTXPrmtMode::ECR, llvm::NVPTX::PTXPrmtMode::F4E, llvm::SelectionDAG::getConstant(), getPRMT(), llvm_unreachable, Mode, llvm::NVPTX::PTXPrmtMode::NONE, llvm::NVPTX::PTXPrmtMode::RC16, and llvm::NVPTX::PTXPrmtMode::RC8.

Referenced by lowerIntrinsicWOChain().

◆ lowerROT()

◆ lowerSELECT()

Definition at line 3103 of file NVPTXISelLowering.cpp.

References llvm::ISD::AND, assert(), Cond, DL, llvm::SelectionDAG::getAnyExtOrTrunc(), llvm::SelectionDAG::getFreeze(), llvm::SelectionDAG::getNode(), llvm::SelectionDAG::getNOT(), llvm::SelectionDAG::getSelect(), llvm::ISD::OR, Select, and llvm::ISD::TRUNCATE.

Referenced by llvm::NVPTXTargetLowering::LowerOperation().

◆ lowerSTOREVector()

Definition at line 3694 of file NVPTXISelLowering.cpp.

Referenced by combineSTORE().

◆ lowerTcgen05Ld()

Definition at line 2706 of file NVPTXISelLowering.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, llvm::ISD::BUILD_VECTOR, llvm::cast(), DL, llvm::SelectionDAG::getMemIntrinsicNode(), llvm::MemSDNode::getMemOperand(), llvm::MemSDNode::getMemoryVT(), llvm::SelectionDAG::getNode(), llvm::SDValue::getValue(), llvm::EVT::getVectorNumElements(), llvm::SelectionDAG::getVTList(), llvm::ISD::INTRINSIC_W_CHAIN, llvm::EVT::isVector(), N, and llvm::SmallVectorTemplateBase< T, bool >::push_back().

Referenced by lowerIntrinsicWChain(), and ReplaceINTRINSIC_W_CHAIN().

◆ LowerTcgen05MMADisableOutputLane()

Definition at line 2675 of file NVPTXISelLowering.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, llvm::cast(), DL, llvm::ISD::EXTRACT_VECTOR_ELT, llvm::SelectionDAG::getIntPtrConstant(), llvm::SelectionDAG::getMemIntrinsicNode(), llvm::MemSDNode::getMemOperand(), llvm::MemSDNode::getMemoryVT(), llvm::SelectionDAG::getNode(), getTcgen05MMADisableOutputLane(), llvm::SDValue::getValueType(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), I, llvm::EVT::isVector(), and N.

Referenced by lowerIntrinsicVoid().

◆ lowerTcgen05St()

Definition at line 2551 of file NVPTXISelLowering.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, llvm::cast(), DL, llvm::ISD::EXTRACT_VECTOR_ELT, llvm::SelectionDAG::getIntPtrConstant(), llvm::SelectionDAG::getMemIntrinsicNode(), llvm::MemSDNode::getMemOperand(), llvm::MemSDNode::getMemoryVT(), llvm::SelectionDAG::getNode(), llvm::SDValue::getValueType(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), I, llvm::ISD::INTRINSIC_VOID, llvm::EVT::isVector(), and N.

Referenced by lowerIntrinsicVoid().

◆ LowerVectorArith()

◆ matchMADConstOnePattern()

◆ PerformADDCombine()

◆ PerformADDCombineWithOperands()

PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.

This is a helper for PerformADDCombine that is called with the default operands, and if that fails, with commuted operands.

Definition at line 5416 of file NVPTXISelLowering.cpp.

References llvm::ISD::ADD, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::SDValue::getNode(), llvm::SelectionDAG::getNode(), llvm::SDValue::getOpcode(), llvm::SDNode::getOperand(), llvm::SelectionDAG::getSelect(), llvm::SDValue::getValueType(), llvm::SDNode::hasOneUse(), isConstZero(), llvm::ISD::MUL, Mul, N, SDValue(), and llvm::ISD::SELECT.

◆ PerformBUILD_VECTORCombine()

Definition at line 6294 of file NVPTXISelLowering.cpp.

References assert(), llvm::cast(), llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::SelectionDAG::getBitcast(), getPRMT(), llvm::EVT::getVectorNumElements(), llvm::EVT::is32BitVector(), llvm::isa(), llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG(), llvm::NVPTX::isPackedVectorTy(), N, SDValue(), llvm::ISD::SRL, and llvm::ISD::TRUNCATE.

◆ PerformEXTRACTCombine()

Definition at line 6209 of file NVPTXISelLowering.cpp.

References llvm::ISD::allOperandsUndef(), llvm::ISD::ANY_EXTEND, llvm::EVT::changeTypeToInteger(), llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::dyn_cast(), llvm::ISD::FREEZE, llvm::SelectionDAG::getConstant(), llvm::MVT::getIntegerVT(), llvm::SelectionDAG::getNode(), llvm::EVT::getScalarSizeInBits(), llvm::EVT::getSimpleVT(), llvm::EVT::getSizeInBits(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), llvm::NVPTX::isPackedVectorTy(), IsPTXVectorType(), llvm::EVT::isSimple(), N, SDValue(), llvm::ISD::SRA, llvm::ISD::TRUNCATE, and Vector.

◆ PerformFADDCombine()

◆ PerformFADDCombineWithOperands()

Definition at line 5456 of file NVPTXISelLowering.cpp.

References llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::ISD::FADD, llvm::ISD::FMA, llvm::ISD::FMUL, llvm::SDNode::getFlags(), llvm::SDNode::getIROrder(), llvm::SelectionDAG::getMachineFunction(), llvm::SDValue::getNode(), llvm::SelectionDAG::getNode(), llvm::SDValue::getOpcode(), llvm::SDValue::getOperand(), llvm::SelectionDAG::getTargetLoweringInfo(), llvm::SDValue::getValueType(), llvm::SDNodeFlags::hasAllowContract(), llvm::isa(), N, SDValue(), and llvm::SDNode::users().

Referenced by PerformFADDCombine().

◆ PerformFMinMaxCombine()

PerformFMinMaxCombine - Combine (fmaxnum (fmaxnum a, b), c) into (fmaxnum3 a, b, c).

Also covers other llvm min/max intrinsics.

Definition at line 5833 of file NVPTXISelLowering.cpp.

References A(), B(), llvm::CallingConv::C, llvm::TargetLowering::DAGCombinerInfo::DAG, getMinMax3Opcode(), llvm::SelectionDAG::getNode(), llvm::SDValue::getOpcode(), llvm::SDValue::getOperand(), llvm::SDValue::hasOneUse(), N, and SDValue().

◆ PerformMULCombine()

◆ PerformMULCombineWithOperands()

◆ PerformREMCombine()

Definition at line 5863 of file NVPTXISelLowering.cpp.

References assert(), llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::Default, DL, llvm::SelectionDAG::getNode(), llvm::SDNode::getOperand(), llvm::ISD::MUL, N, llvm::ISD::SDIV, SDValue(), llvm::ISD::SREM, llvm::ISD::SUB, llvm::ISD::UDIV, llvm::ISD::UREM, and llvm::SDNode::users().

◆ PerformSETCCCombine()

Definition at line 6182 of file NVPTXISelLowering.cpp.

References A(), B(), llvm::ISD::BUILD_VECTOR, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::SelectionDAG::getNode(), llvm::SDValue::getValue(), llvm::SelectionDAG::getVTList(), N, SDValue(), llvm::NVPTXISD::SETP_BF16X2, and llvm::NVPTXISD::SETP_F16X2.

◆ PerformSHLCombine()

◆ PerformVSELECTCombine()

Definition at line 6261 of file NVPTXISelLowering.cpp.

References llvm::ISD::BUILD_VECTOR, llvm::CallingConv::C, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, E(), llvm::ISD::EXTRACT_VECTOR_ELT, llvm::SelectionDAG::getAnyExtOrTrunc(), llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), llvm::SDValue::getValueType(), I, N, SDValue(), and llvm::ISD::SELECT.

◆ PromoteBinOpToF32()

◆ promoteScalarIntegerPTX()

◆ refinePtrAS()

◆ replaceAtomicSwap128()

Definition at line 6792 of file NVPTXISelLowering.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, assert(), llvm::NVPTXISD::ATOMIC_CMP_SWAP_B128, llvm::NVPTXISD::ATOMIC_SWAP_B128, llvm::ISD::BUILD_PAIR, llvm::cast(), llvm::LLVMContext::diagnose(), llvm::ISD::EXTRACT_ELEMENT, llvm::SelectionDAG::getContext(), llvm::SDLoc::getDebugLoc(), llvm::MachineFunction::getFunction(), llvm::SelectionDAG::getIntPtrConstant(), llvm::SelectionDAG::getMachineFunction(), llvm::SelectionDAG::getMemIntrinsicNode(), llvm::MemSDNode::getMemOperand(), llvm::SelectionDAG::getNode(), llvm::SDNode::getOperand(), llvm::SelectionDAG::getUNDEF(), llvm::SelectionDAG::getVTList(), llvm::NVPTXSubtarget::hasAtomSwap128(), N, llvm::SDNode::ops(), and Results.

◆ ReplaceBITCAST()

◆ ReplaceCopyFromReg_128()

◆ ReplaceINTRINSIC_W_CHAIN()

Definition at line 6600 of file NVPTXISelLowering.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, llvm::SmallVectorImpl< T >::append(), assert(), llvm::cast(), DL, llvm::SDNode::getAsZExtVal(), llvm::SelectionDAG::getBuildVector(), llvm::SelectionDAG::getMemIntrinsicNode(), llvm::MemSDNode::getMemOperand(), llvm::MemSDNode::getMemoryVT(), llvm::SDValue::getNode(), llvm::SelectionDAG::getNode(), llvm::EVT::getSimpleVT(), llvm::EVT::getSizeInBits(), llvm::SDValue::getValue(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), llvm::SelectionDAG::getVTList(), llvm::ISD::INTRINSIC_W_CHAIN, llvm::EVT::isSimple(), llvm::EVT::isVector(), llvm::NVPTXISD::LDUV2, llvm::NVPTXISD::LDUV4, lowerTcgen05Ld(), N, llvm::SmallVectorTemplateBase< T, bool >::push_back(), Results, llvm::MVT::SimpleTy, and llvm::ISD::TRUNCATE.

◆ replaceLoadVector() [1/2]

replaceLoadVector - Convert vector loads into multi-output scalar loads.

Definition at line 3503 of file NVPTXISelLowering.cpp.

Referenced by lowerLoadVector(), and replaceLoadVector().

◆ replaceLoadVector() [2/2]

◆ replaceProxyReg()

◆ shouldConvertToIndirectCall()

◆ simplifyDemandedBitsForPRMT()

Definition at line 7153 of file NVPTXISelLowering.cpp.

References assert(), canonicalizePRMTInput(), llvm::Depth, llvm::dyn_cast(), llvm::SDValue::getConstantOperandVal(), llvm::APInt::getLoBits(), llvm::SDValue::getOpcode(), llvm::SDValue::getOperand(), getPRMT(), getPRMTDemandedBits(), getPRMTSelector(), llvm::APInt::getZExtValue(), Mode, SDValue(), and llvm::TargetLowering::SimplifyMultipleUseDemandedBits().

Referenced by llvm::NVPTXTargetLowering::SimplifyDemandedBitsForTargetNode().

◆ sinkProxyReg()

Definition at line 6452 of file NVPTXISelLowering.cpp.

References A(), AbstractManglingParser< Derived, Alloc >::Ops, llvm::ISD::ANY_EXTEND, B(), llvm::ISD::BUILD_VECTOR, llvm::ISD::Constant, llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::ISD::EXTRACT_VECTOR_ELT, llvm::SelectionDAG::getNode(), llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize(), llvm::NVPTXISD::LoadV2, llvm::NVPTXISD::LoadV4, llvm::ISD::OR, SDValue(), llvm::ISD::SHL, llvm::ISD::SIGN_EXTEND, sinkProxyReg(), llvm::ISD::SRA, llvm::ISD::SRL, llvm::ISD::TRUNCATE, and llvm::ISD::ZERO_EXTEND.

Referenced by combineProxyReg(), and sinkProxyReg().

◆ TryMULWIDECombine()

TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e.

mul.wide). This transform works on both multiply DAG nodes and SHL DAG nodes with a constant shift amount.

Definition at line 6002 of file NVPTXISelLowering.cpp.

References AreMulWideOperandsDemotable(), llvm::BitWidth, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::dyn_cast(), llvm::ConstantSDNode::getAPIntValue(), llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), llvm::EVT::getSizeInBits(), llvm::isa(), LHS, llvm::ISD::MUL, N, Opc, RHS, SDValue(), llvm::APInt::sge(), llvm::ISD::SHL, Signed, llvm::APInt::slt(), std::swap(), and llvm::ISD::TRUNCATE.

Referenced by PerformMULCombine(), and PerformSHLCombine().

◆ VectorizePTXValueVTs()

◆ FMAContractLevelOpt

cl::opt< unsigned > FMAContractLevelOpt("nvptx-fma-level", cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2)) ( "nvptx-fma-level" , cl::Hidden )	static

◆ ForceMinByValParamAlign

cl::opt< bool > ForceMinByValParamAlign("nvptx-force-min-byval-param-align", cl::Hidden, cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions."), cl::init(false)) ( "nvptx-force-min-byval-param-align" , cl::Hidden , cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions.") , cl::init(false) )	static

◆ sched4reg

cl::opt< bool > sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)) ( "nvptx-sched4reg" , cl::desc("NVPTX Specific: schedule for register pressue") , cl::init(false) )	static

◆ UseApproxLog2F32

cl::opt< bool > UseApproxLog2F32("nvptx-approx-log2f32", cl::desc("NVPTX Specific: whether to use lg2.approx for log2"), cl::init(false)) ( "nvptx-approx-log2f32" , cl::desc("NVPTX Specific: whether to use lg2.approx for log2") , cl::init(false) )	static

◆ UsePrecDivF32

cl::opt< NVPTX::DivPrecisionLevel > UsePrecDivF32("nvptx-prec-divf32", cl::Hidden, cl::desc( "NVPTX Specific: Override the precision of the lowering for f32 fdiv"), cl::values( clEnumValN(NVPTX::DivPrecisionLevel::Approx, "0", "Use div.approx"), clEnumValN(NVPTX::DivPrecisionLevel::Full, "1", "Use div.full"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754, "2", "Use IEEE Compliant F32 div.rnd if available (default)"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754_NoFTZ, "3", "Use IEEE Compliant F32 div.rnd if available, no FTZ")), cl::init(NVPTX::DivPrecisionLevel::IEEE754)) ( "nvptx-prec-divf32" , cl::Hidden , cl::desc( "NVPTX Specific: Override the precision of the lowering for f32 fdiv") , cl::values( clEnumValN(NVPTX::DivPrecisionLevel::Approx, "0", "Use div.approx"), clEnumValN(NVPTX::DivPrecisionLevel::Full, "1", "Use div.full"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754, "2", "Use IEEE Compliant F32 div.rnd if available (default)"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754_NoFTZ, "3", "Use IEEE Compliant F32 div.rnd if available, no FTZ")) , cl::init(NVPTX::DivPrecisionLevel::IEEE754) )

static

◆ UsePrecSqrtF32

cl::opt< bool > UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true)) ( "nvptx-prec-sqrtf32" , cl::Hidden , cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn.") , cl::init(true) )	static