SLPVectorizer.cpp File Reference (original) (raw)

Go to the source code of this file.

Classes
class	slpvectorizer::BoUpSLP
	Bottom Up SLP Vectorizer. More...
struct	slpvectorizer::BoUpSLP::EdgeInfo
	This structure holds any data we need about the edges being traversed during buildTreeRec(). More...
class	slpvectorizer::BoUpSLP::LookAheadHeuristics
	A helper class used for scoring candidates for two consecutive lanes. More...
class	slpvectorizer::BoUpSLP::VLOperands
	A helper data structure to hold the operands of a vector of instructions. More...
struct	llvm::DenseMapInfo< BoUpSLP::EdgeInfo >
struct	llvm::GraphTraits< BoUpSLP * >
struct	llvm::GraphTraits< BoUpSLP * >::ChildIteratorType
	Add the VectorizableTree to the index iterator to be able to return TreeEntry pointers. More...
class	llvm::GraphTraits< BoUpSLP * >::nodes_iterator
	For the node iterator we just need to turn the TreeEntry iterator into a TreeEntry* iterator so that it dereferences to NodeRef. More...
struct	llvm::DOTGraphTraits< BoUpSLP * >
class	slpvectorizer::BoUpSLP::ShuffleCostEstimator
	Merges shuffle masks and emits final shuffle instruction, if required. More...
class	slpvectorizer::BoUpSLP::ShuffleInstructionBuilder
	Merges shuffle masks and emits final shuffle instruction, if required. More...

Macros
#define	SV_NAME "slp-vectorizer"
#define	DEBUG_TYPE "SLP"

Functions
	STATISTIC (NumVectorInstructions, "Number of vector instructions generated")
	DEBUG_COUNTER (VectorizedGraphs, "slp-vectorized", "Controls which SLP graphs should be vectorized.")
static bool	isValidElementType (Type *Ty)
	Predicate for the element types that the SLP vectorizer supports.
static Type *	getValueType (Value *V)
	Returns the type of the given value/instruction V.
static unsigned	getNumElements (Type *Ty)
static FixedVectorType *	getWidenedType (Type *ScalarTy, unsigned VF)
static unsigned	getFullVectorNumberOfElements (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz)
	Returns the number of elements of the given type Ty, not less than Sz, which forms type, which splits by TTI into whole vector types during legalization.
static unsigned	getFloorFullVectorNumberOfElements (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz)
	Returns the number of elements of the given type Ty, not greater than Sz, which forms type, which splits by TTI into whole vector types during legalization.
static void	transformScalarShuffleIndiciesToVector (unsigned VecTyNumElements, SmallVectorImpl< int > &Mask)
static unsigned	getShufflevectorNumGroups (ArrayRef< Value * > VL)
static SmallVector< int >	calculateShufflevectorMask (ArrayRef< Value * > VL)
static bool	isConstant (Value *V)
static bool	isVectorLikeInstWithConstOps (Value *V)
	Checks if V is one of vector-like instructions, i.e.
static unsigned	getPartNumElems (unsigned Size, unsigned NumParts)
	Returns power-of-2 number of elements in a single register (part), given the total number of elements Size and number of registers (parts) NumParts.
static unsigned	getNumElems (unsigned Size, unsigned PartNumElems, unsigned Part)
	Returns correct remaining number of elements, considering total amount Size, (power-of-2 number) of elements in a single register PartNumElems and current register (part) Part.
static std::string	shortBundleName (ArrayRef< Value * > VL, int Idx=-1)
	Print a short descriptor of the instruction bundle suitable for debug output.
static bool	allSameBlock (ArrayRef< Value * > VL)
static bool	allConstant (ArrayRef< Value * > VL)
static bool	isSplat (ArrayRef< Value * > VL)
static bool	isCommutative (Instruction I, Value ValWithUses, bool IsCopyable=false)
static bool	isCommutative (Instruction *I)
	This is a helper function to check whether I is commutative.
static unsigned	getNumberOfPotentiallyCommutativeOps (Instruction *I)
template<typename T>
static std::optional< unsigned >	getInsertExtractIndex (const Value *Inst, unsigned Offset)
static std::optional< unsigned >	getElementIndex (const Value *Inst, unsigned Offset=0)
static bool	allSameOpcode (ArrayRef< Value * > VL)
static SmallBitVector	buildUseMask (int VF, ArrayRef< int > Mask, UseMask MaskArg)
	Prepares a use bitset for the given mask either for the first argument or for the second.
template<bool IsPoisonOnly = false>
static SmallBitVector	isUndefVector (const Value *V, const SmallBitVector &UseMask={})
	Checks if the given value is actually an undefined constant vector.
static std::optional< TargetTransformInfo::ShuffleKind >	isFixedVectorShuffle (ArrayRef< Value * > VL, SmallVectorImpl< int > &Mask, AssumptionCache *AC)
	Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 Mask will return the Shuffle Mask equivalent to the extracted elements.
static std::optional< unsigned >	getExtractIndex (const Instruction *E)
static bool	areAllOperandsNonInsts (Value *V)
	Checks if the provided value does not require scheduling.
static bool	isUsedOutsideBlock (Value *V)
	Checks if the provided value does not require scheduling.
static bool	doesNotNeedToBeScheduled (Value *V)
	Checks if the specified value does not require scheduling.
static bool	isValidForAlternation (unsigned Opcode)
static InstructionsState	getSameOpcode (ArrayRef< Value * > VL, const TargetLibraryInfo &TLI)
static Instruction *	findInstructionWithOpcode (ArrayRef< Value * > VL, unsigned Opcode)
	Find an instruction with a specific opcode in VL.
static bool	areCompatibleCmpOps (Value BaseOp0, Value BaseOp1, Value Op0, Value Op1, const TargetLibraryInfo &TLI)
	Checks if the provided operands of 2 cmp instructions are compatible, i.e.
static bool	isCmpSameOrSwapped (const CmpInst BaseCI, const CmpInst CI, const TargetLibraryInfo &TLI)
static bool	allSameType (ArrayRef< Value * > VL)
static bool	doesInTreeUserNeedToExtract (Value Scalar, Instruction UserInst, TargetLibraryInfo TLI, const TargetTransformInfo TTI)
static MemoryLocation	getLocation (Instruction *I)
static bool	isSimple (Instruction *I)
static void	addMask (SmallVectorImpl< int > &Mask, ArrayRef< int > SubMask, bool ExtendingManyInputs=false)
	Shuffles Mask in accordance with the given SubMask.
static void	fixupOrderingIndices (MutableArrayRef< unsigned > Order)
	Order may have elements assigned special value (size) which is out of bounds.
static SmallBitVector	getAltInstrMask (ArrayRef< Value * > VL, Type *ScalarTy, unsigned Opcode0, unsigned Opcode1)
static SmallVector< Constant * >	replicateMask (ArrayRef< Constant * > Val, unsigned VF)
	Replicates the given Val VF times.
static void	inversePermutation (ArrayRef< unsigned > Indices, SmallVectorImpl< int > &Mask)
static void	reorderScalars (SmallVectorImpl< Value * > &Scalars, ArrayRef< int > Mask)
	Reorders the list of scalars in accordance with the given Mask.
static bool	doesNotNeedToSchedule (ArrayRef< Value * > VL)
	Checks if the specified array of instructions does not require scheduling.
static bool	hasFullVectorsOrPowerOf2 (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz)
	Returns true if widened type of Ty elements with size Sz represents full vector type, i.e.
static unsigned	getNumberOfParts (const TargetTransformInfo &TTI, VectorType *VecTy, const unsigned Limit=std::numeric_limits< unsigned >::max())
	Returns number of parts, the type VecTy will be split at the codegen phase.
static void	reorderReuses (SmallVectorImpl< int > &Reuses, ArrayRef< int > Mask)
	Reorders the given Reuses mask according to the given Mask.
static void	reorderOrder (SmallVectorImpl< unsigned > &Order, ArrayRef< int > Mask, bool BottomOrder=false)
	Reorders the given Order according to the given Mask.
static bool	arePointersCompatible (Value Ptr1, Value Ptr2, const TargetLibraryInfo &TLI, bool CompareOpcodes=true)
template<typename T>
static Align	computeCommonAlignment (ArrayRef< Value * > VL)
	Calculates minimal alignment as a common alignment.
static bool	isReverseOrder (ArrayRef< unsigned > Order)
	Check if Order represents reverse order.
static const SCEV *	calculateRtStride (ArrayRef< Value * > PointerOps, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
	Checks if the provided list of pointers Pointers represents the strided pointers for type ElemTy.
static std::pair< InstructionCost, InstructionCost >	getGEPCosts (const TargetTransformInfo &TTI, ArrayRef< Value * > Ptrs, Value BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind, Type ScalarTy, VectorType *VecTy)
	Calculate the scalar and the vector costs from vectorizing set of GEPs.
static InstructionCost	getShuffleCost (const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, VectorType Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType SubTp=nullptr, ArrayRef< const Value * > Args={})
	Returns the cost of the shuffle instructions with the given Kind, vector type Tp and optional Mask.
static InstructionCost	getScalarizationOverhead (const TargetTransformInfo &TTI, Type ScalarTy, VectorType Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={})
	This is similar to TargetTransformInfo::getScalarizationOverhead, but if ScalarTy is a FixedVectorType, a vector will be inserted or extracted instead of a scalar.
static InstructionCost	getVectorInstrCost (const TargetTransformInfo &TTI, Type ScalarTy, unsigned Opcode, Type Val, TTI::TargetCostKind CostKind, unsigned Index, Value Scalar, ArrayRef< std::tuple< Value , User *, int > > ScalarUserAndIdx)
	This is similar to TargetTransformInfo::getVectorInstrCost, but if ScalarTy is a FixedVectorType, a vector will be extracted instead of a scalar.
static InstructionCost	getExtractWithExtendCost (const TargetTransformInfo &TTI, unsigned Opcode, Type Dst, VectorType VecTy, unsigned Index, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)
	This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst is a FixedVectorType, a vector will be extracted instead of a scalar.
static Value *	createInsertVector (IRBuilderBase &Builder, Value Vec, Value V, unsigned Index, function_ref< Value (Value , Value *, ArrayRef< int >)> Generator={})
	Creates subvector insert.
static Value *	createExtractVector (IRBuilderBase &Builder, Value *Vec, unsigned SubVecVF, unsigned Index)
	Generates subvector extract using Generator or using default shuffle.
static bool	buildCompressMask (ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, Type *ScalarTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< int > &CompressMask)
	Builds compress-like mask for shuffles for the given PointerOps, ordered with Order.
static bool	isMaskedLoadCompress (ArrayRef< Value * > VL, ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, AssumptionCache &AC, const DominatorTree &DT, const TargetLibraryInfo &TLI, const function_ref< bool(Value )> AreAllUsersVectorized, bool &IsMasked, unsigned &InterleaveFactor, SmallVectorImpl< int > &CompressMask, VectorType &LoadVecTy)
	Checks if the VL can be transformed to a (masked)load + compress or (masked) interleaved load.
static bool	isMaskedLoadCompress (ArrayRef< Value * > VL, ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, AssumptionCache &AC, const DominatorTree &DT, const TargetLibraryInfo &TLI, const function_ref< bool(Value *)> AreAllUsersVectorized)
	Checks if the VL can be transformed to a (masked)load + compress or (masked) interleaved load.
static bool	clusterSortPtrAccesses (ArrayRef< Value * > VL, ArrayRef< BasicBlock * > BBs, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
static bool	areTwoInsertFromSameBuildVector (InsertElementInst VU, InsertElementInst V, function_ref< Value (InsertElementInst )> GetBaseOperand)
	Check if two insertelement instructions are from the same buildvector.
static bool	isAlternateInstruction (Instruction I, Instruction MainOp, Instruction *AltOp, const TargetLibraryInfo &TLI)
	Checks if the specified instruction I is an alternate operation for the given MainOp and AltOp instructions.
static bool	isRepeatedNonIdentityClusteredMask (ArrayRef< int > Mask, unsigned Sz)
	Checks if the given mask is a "clustered" mask with the same clusters of size Sz, which are not identity submasks.
static void	combineOrders (MutableArrayRef< unsigned > Order, ArrayRef< unsigned > SecondaryOrder)
static LLVM_DUMP_METHOD void	dumpOrder (const BoUpSLP::OrdersType &Order)
static void	gatherPossiblyVectorizableLoads (const BoUpSLP &R, ArrayRef< Value * > VL, const DataLayout &DL, ScalarEvolution &SE, const TargetTransformInfo &TTI, SmallVectorImpl< SmallVector< std::pair< LoadInst *, int64_t > > > &GatheredLoads, bool AddNew=true)
	Tries to find subvector of loads and builds new vector of only loads if can be profitable.
static std::pair< size_t, size_t >	generateKeySubkey (Value V, const TargetLibraryInfo TLI, function_ref< hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator, bool AllowAlternate)
	Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences.
static bool	isMainInstruction (Instruction I, Instruction MainOp, Instruction *AltOp, const TargetLibraryInfo &TLI)
	Checks if the specified instruction I is an main operation for the given MainOp and AltOp instructions.
static SmallVector< Type * >	buildIntrinsicArgTypes (const CallInst CI, const Intrinsic::ID ID, const unsigned VF, unsigned MinBW, const TargetTransformInfo TTI)
	Builds the arguments types vector for the given call instruction with the given ID for the specified vector factor.
static std::pair< InstructionCost, InstructionCost >	getVectorCallCosts (CallInst CI, FixedVectorType VecTy, TargetTransformInfo TTI, TargetLibraryInfo TLI, ArrayRef< Type * > ArgTys)
	Calculates the costs of vectorized intrinsic (if possible) and vectorized function (if possible) calls.
static std::pair< Instruction , Instruction >	getMainAltOpsNoStateVL (ArrayRef< Value * > VL)
	Returns main/alternate instructions for the given VL.
static bool	tryToFindDuplicates (SmallVectorImpl< Value * > &VL, SmallVectorImpl< int > &ReuseShuffleIndices, const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, const InstructionsState &S, const BoUpSLP::EdgeInfo &UserTreeIdx, bool TryPad=false)
	Checks that every instruction appears once in the list and if not, packs them, building ReuseShuffleIndices mask and mutating VL.
static InstructionCost	canConvertToFMA (ArrayRef< Value * > VL, const InstructionsState &S, DominatorTree &DT, const DataLayout &DL, TargetTransformInfo &TTI, const TargetLibraryInfo &TLI)
	Check if we can convert fadd/fsub sequence to FMAD.
static bool	isLoadCombineCandidateImpl (Value Root, unsigned NumElts, TargetTransformInfo TTI, bool MustMatchOrInst)
static bool	isFirstInsertElement (const InsertElementInst IE1, const InsertElementInst IE2)
	Checks if the IE1 instructions is followed by IE2 instruction in the buildvector sequence.
template<typename T>
static T *	performExtractsShuffleAction (MutableArrayRef< std::pair< T , SmallVector< int > > > ShuffleMask, Value Base, function_ref< unsigned(T )> GetVF, function_ref< std::pair< T , bool >(T , ArrayRef< int >, bool)> ResizeAction, function_ref< T (ArrayRef< int >, ArrayRef< T * >)> Action)
	Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks.
static Instruction *	propagateMetadata (Instruction Inst, ArrayRef< Value > VL)
static DebugLoc	getDebugLocFromPHI (PHINode &PN)
static RecurKind	getRdxKind (Value *V)
	Gets recurrence kind from the specified value.
static bool	checkTreeSizes (ArrayRef< std::pair< unsigned, unsigned > > Sizes, bool First)
	Checks if the quadratic mean deviation is less than 90% of the mean size.
static std::optional< unsigned >	getAggregateSize (Instruction *InsertInst)
static void	findBuildAggregateRec (Instruction LastInsertInst, TargetTransformInfo TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, unsigned OperandOffset, const BoUpSLP &R)
static bool	findBuildAggregate (Instruction LastInsertInst, TargetTransformInfo TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, const BoUpSLP &R)
	Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.
static Instruction *	getReductionInstr (const DominatorTree DT, PHINode P, BasicBlock ParentBB, LoopInfo LI)
	Try and get a reduction instruction from a phi node.
static bool	matchRdxBop (Instruction I, Value &V0, Value *&V1)
static Instruction *	tryGetSecondaryReductionRoot (PHINode Phi, Instruction Root)
	We could have an initial reduction that is not an add.
static Instruction *	getNonPhiOperand (Instruction I, PHINode Phi)
	Returns the first operand of I that does not match Phi.
static bool	isReductionCandidate (Instruction *I)
	\Returns true if I is a candidate instruction for reduction vectorization.
template<typename T>
static bool	tryToVectorizeSequence (SmallVectorImpl< T * > &Incoming, function_ref< bool(T , T )> Comparator, function_ref< bool(ArrayRef< T * >, T )> AreCompatible, function_ref< bool(ArrayRef< T >, bool)> TryToVectorizeHelper, bool MaxVFOnly, BoUpSLP &R)
template<bool IsCompatibility>
static bool	compareCmp (Value V, Value V2, TargetLibraryInfo &TLI, const DominatorTree &DT)
	Compare two cmp instructions.

Variables
static cl::opt< bool >	RunSLPVectorization ("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes"))
static cl::opt< bool >	SLPReVec ("slp-revec", cl::init(false), cl::Hidden, cl::desc("Enable vectorization for wider vector utilization"))
static cl::opt< int >	SLPCostThreshold ("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number "))
static cl::opt< bool >	SLPSkipEarlyProfitabilityCheck ("slp-skip-early-profitability-check", cl::init(false), cl::Hidden, cl::desc("When true, SLP vectorizer bypasses profitability checks based on " "heuristics and makes vectorization decision via cost modeling."))
static cl::opt< bool >	ShouldVectorizeHor ("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions"))
static cl::opt< bool >	ShouldStartVectorizeHorAtStore ("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions feeding into a store"))
static cl::opt< bool >	SplitAlternateInstructions ("slp-split-alternate-instructions", cl::init(true), cl::Hidden, cl::desc("Improve the code quality by splitting alternate instructions"))
static cl::opt< int >	MaxVectorRegSizeOption ("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
static cl::opt< unsigned >	MaxVFOption ("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)"))
static cl::opt< int >	ScheduleRegionSizeBudget ("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block"))
	Limits the size of scheduling regions in a block.
static cl::opt< int >	MinVectorRegSizeOption ("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
static cl::opt< unsigned >	RecursionMaxDepth ("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree"))
static cl::opt< unsigned >	MinTreeSize ("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable"))
static cl::opt< int >	LookAheadMaxDepth ("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores"))
static cl::opt< int >	RootLookAheadMaxDepth ("slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option"))
static cl::opt< unsigned >	MinProfitableStridedLoads ("slp-min-strided-loads", cl::init(2), cl::Hidden, cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value"))
static cl::opt< unsigned >	MaxProfitableLoadStride ("slp-max-stride", cl::init(8), cl::Hidden, cl::desc("The maximum stride, considered to be profitable."))
static cl::opt< bool >	DisableTreeReorder ("slp-disable-tree-reorder", cl::init(false), cl::Hidden, cl::desc("Disable tree reordering even if it is " "profitable. Used for testing only."))
static cl::opt< bool >	ForceStridedLoads ("slp-force-strided-loads", cl::init(false), cl::Hidden, cl::desc("Generate strided loads even if they are not " "profitable. Used for testing only."))
static cl::opt< bool >	ViewSLPTree ("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz"))
static cl::opt< bool >	VectorizeNonPowerOf2 ("slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden, cl::desc("Try to vectorize with non-power-of-2 number of elements."))
static cl::opt< bool >	VectorizeCopyableElements ("slp-copyable-elements", cl::init(true), cl::Hidden, cl::desc("Try to replace values with the idempotent instructions for " "better vectorization."))
	Enables vectorization of copyable elements.
static const unsigned	AliasedCheckLimit = 10
static constexpr int	UsesLimit = 64
static const unsigned	MaxMemDepDistance = 160
static const int	MinScheduleRegionSize = 16
	If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.
static const unsigned	MaxPHINumOperands = 128
	Maximum allowed number of operands in the PHI nodes.

◆ DEBUG_TYPE

◆ SV_NAME

#define SV_NAME "slp-vectorizer"

◆ addMask()

◆ allConstant()

◆ allSameBlock()

Returns

true if all of the instructions in VL are in the same block or false otherwise.

Definition at line 483 of file SLPVectorizer.cpp.

References llvm::all_of(), llvm::cast(), llvm::dyn_cast(), llvm::ArrayRef< T >::end(), llvm::find_if(), llvm::ilist_detail::node_parent_access< NodeTy, ParentTy >::getParent(), II, llvm::isa(), llvm::IsaPred, isVectorLikeInstWithConstOps(), and llvm::iterator_range().

Referenced by slpvectorizer::BoUpSLP::isTreeNotExtendable(), and slpvectorizer::BoUpSLP::isTreeTinyAndNotFullyVectorizable().

◆ allSameOpcode()

◆ allSameType()

◆ areAllOperandsNonInsts()

bool areAllOperandsNonInsts ( Value * V)	static

◆ areCompatibleCmpOps()

◆ arePointersCompatible()

◆ areTwoInsertFromSameBuildVector()

◆ buildCompressMask()

Builds compress-like mask for shuffles for the given PointerOps, ordered with Order.

Returns

true if the mask represents strided access, false - otherwise.

Definition at line 6787 of file SLPVectorizer.cpp.

References llvm::SmallVectorImpl< T >::assign(), DL, llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::front(), llvm::getPointersDiff(), I, llvm::PoisonMaskElem, llvm::seq(), and llvm::ArrayRef< T >::size().

Referenced by isMaskedLoadCompress().

◆ buildIntrinsicArgTypes()

◆ buildUseMask()

◆ calculateRtStride()

Checks if the provided list of pointers Pointers represents the strided pointers for type ElemTy.

If they are not, nullptr is returned. Otherwise, SCEV* of the stride value is returned.

Definition at line 6531 of file SLPVectorizer.cpp.

References llvm::SmallVectorImpl< T >::clear(), DL, llvm::dyn_cast(), llvm::ScalarEvolution::getAddExpr(), llvm::ScalarEvolution::getConstant(), llvm::ScalarEvolution::getMinusSCEV(), llvm::ScalarEvolution::getMulExpr(), llvm::ScalarEvolution::getSCEV(), llvm::SCEV::getType(), llvm::ScalarEvolution::getUDivExactExpr(), llvm::isa(), llvm::SCEV::isNonConstantNegative(), llvm::SCEV::isZero(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SmallVectorImpl< T >::resize(), Size, llvm::ArrayRef< T >::size(), and llvm::SmallVectorTemplateCommon< T, typename >::size().

Referenced by slpvectorizer::BoUpSLP::analyzeRtStrideCandidate().

◆ calculateShufflevectorMask()

Returns

a shufflevector mask which is used to vectorize shufflevectors e.g., %5 = shufflevector <8 x i16> %3, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %6 = shufflevector <8 x i16> %3, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %7 = shufflevector <8 x i16> %4, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %8 = shufflevector <8 x i16> %4, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> the result is <0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31>

Definition at line 413 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), llvm::ArrayRef< T >::front(), getShufflevectorNumGroups(), and llvm::PoisonMaskElem.

◆ canConvertToFMA()

Check if we can convert fadd/fsub sequence to FMAD.

Returns

Cost of the FMAD, if conversion is possible, invalid cost otherwise.

Definition at line 12965 of file SLPVectorizer.cpp.

References AbstractManglingParser< Derived, Alloc >::NumOps, llvm::all_of(), llvm::FastMathFlags::allowContract(), Analysis, assert(), CostKind, DL, llvm::dyn_cast(), llvm::ArrayRef< T >::front(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::InstructionCost::getInvalid(), getSameOpcode(), I, llvm::FastMathFlags::set(), llvm::TargetTransformInfo::TCK_RecipThroughput, and llvm::zip().

◆ checkTreeSizes()

◆ clusterSortPtrAccesses()

Definition at line 7439 of file SLPVectorizer.cpp.

References llvm::all_of(), llvm::any_of(), assert(), llvm::sampleprof::Base, llvm::SmallVectorImpl< T >::clear(), llvm::SmallPtrSetImpl< PtrType >::contains(), llvm::Depth, DL, llvm::ArrayRef< T >::drop_front(), llvm::SmallVectorImpl< T >::emplace_back(), llvm::enumerate(), llvm::ArrayRef< T >::front(), llvm::getUnderlyingObject(), llvm::InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key, llvm::SmallPtrSetImpl< PtrType >::insert(), P, llvm::SmallVectorTemplateBase< T, bool >::push_back(), RecursionMaxDepth, llvm::ArrayRef< T >::size(), llvm::SmallVectorTemplateCommon< T, typename >::size(), llvm::stable_sort(), and T.

Referenced by slpvectorizer::BoUpSLP::findPartiallyOrderedLoads().

◆ combineOrders()

◆ compareCmp()

template<bool IsCompatibility>

Compare two cmp instructions.

If IsCompatibility is true, function returns true if 2 cmps have same/swapped predicates and mos compatible corresponding operands. If IsCompatibility is false, function implements strict weak ordering relation between two cmp instructions, returning true if the first instruction is "less" than the second, i.e. its predicate is less than the predicate of the second or the operands IDs are less than the operands IDs of the second cmp instruction.

Definition at line 26263 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), llvm::dyn_cast(), E(), llvm::DomTreeNodeBase< NodeT >::getDFSNumIn(), llvm::DominatorTreeBase< NodeT, IsPostDom >::getNode(), llvm::User::getOperand(), getSameOpcode(), llvm::Type::getScalarSizeInBits(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::Type::getTypeID(), llvm::Value::getValueID(), I, and isValidElementType().

Referenced by llvm::SLPVectorizerPass::vectorizeCmpInsts().

◆ computeCommonAlignment()

◆ createExtractVector()

◆ createInsertVector()

Creates subvector insert.

Generates shuffle using Generator or using default shuffle.

Definition at line 6747 of file SLPVectorizer.cpp.

◆ DEBUG_COUNTER()

DEBUG_COUNTER	(	VectorizedGraphs	,
"slp-vectorized"	,
"Controls which SLP graphs should be vectorized."	)

◆ doesInTreeUserNeedToExtract()

◆ doesNotNeedToBeScheduled()

bool doesNotNeedToBeScheduled ( Value * V)	static

◆ doesNotNeedToSchedule()

◆ dumpOrder()

◆ findBuildAggregate()

Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.

Also recognize homogeneous aggregates like {<2 x float>, <2 x float>}, {{float, float}, {float, float}}, [2 x {float, float}] and so on. See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.

Assume LastInsertInst is of InsertElementInst or InsertValueInst type.

Returns

true if it matches.

Definition at line 25736 of file SLPVectorizer.cpp.

References assert(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::erase(), findBuildAggregateRec(), getAggregateSize(), llvm::isa(), llvm::SmallVectorImpl< T >::resize(), and llvm::SmallVectorTemplateCommon< T, typename >::size().

◆ findBuildAggregateRec()

◆ findInstructionWithOpcode()

Find an instruction with a specific opcode in VL.

Parameters

VL	Array of values to search through. Must contain only Instructions and PoisonValues.
Opcode	The instruction opcode to search for

Returns

The first instruction found with matching opcode
nullptr if no matching instruction is found

Definition at line 1430 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), and llvm::isa().

Referenced by getSameOpcode().

◆ fixupOrderingIndices()

Order may have elements assigned special value (size) which is out of bounds.

Such indices only appear on places which correspond to undef values (see canReuseExtract for details) and used in order to avoid undef values have effect on operands ordering. The first loop below simply finds all unused indices and then the next loop nest assigns these indices for undef values positions. As an example below Order has two undef positions and they have assigned values 3 and 7 respectively: before: 6 9 5 4 9 2 1 0 after: 6 3 5 4 7 2 1 0

Definition at line 1768 of file SLPVectorizer.cpp.

References assert(), llvm::SmallBitVector::count(), llvm::SmallBitVector::find_first(), llvm::SmallBitVector::find_next(), I, llvm::SmallBitVector::none(), llvm::SmallBitVector::reset(), llvm::SmallBitVector::set(), and llvm::ArrayRef< T >::size().

Referenced by slpvectorizer::BoUpSLP::getReorderingData(), slpvectorizer::BoUpSLP::reorderBottomToTop(), reorderOrder(), and slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ gatherPossiblyVectorizableLoads()

Tries to find subvector of loads and builds new vector of only loads if can be profitable.

Definition at line 9205 of file SLPVectorizer.cpp.

References llvm::any_of(), assert(), llvm::ArrayRef< T >::begin(), llvm::bit_ceil(), llvm::SetVector< T, Vector, Set, N >::contains(), llvm::SmallPtrSetImpl< PtrType >::contains(), llvm::SmallSet< T, N, C >::contains(), llvm::Data, DL, llvm::dyn_cast(), llvm::SmallVectorImpl< T >::emplace_back(), llvm::ArrayRef< T >::empty(), llvm::SetVector< T, Vector, Set, N >::empty(), llvm::ArrayRef< T >::end(), llvm::enumerate(), llvm::find_if(), llvm::ArrayRef< T >::front(), llvm::ilist_detail::node_parent_access< NodeTy, ParentTy >::getParent(), llvm::LoadInst::getPointerOperand(), llvm::getPointersDiff(), llvm::Value::getType(), llvm::getUnderlyingObject(), getValueType(), llvm::has_single_bit(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::SmallSet< T, N, C >::insert(), llvm::SetVector< T, Vector, Set, N >::insert_range(), isValidElementType(), llvm::Offset, P, RecursionMaxDepth, llvm::seq(), llvm::ArrayRef< T >::size(), and llvm::zip().

◆ generateKeySubkey()

Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences.

The keys/subkeys can be used for better sorting of the values themselves (keys) and in values subgroups (subkeys).

Definition at line 9823 of file SLPVectorizer.cpp.

References llvm::SmallBitVector::all(), Call, llvm::cast(), llvm::dyn_cast(), generateKeySubkey(), llvm::CmpInst::getInversePredicate(), llvm::VFDatabase::getMappings(), llvm::User::getOperand(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::getVectorIntrinsicIDForCall(), llvm::hash_combine(), llvm::hash_value(), I, llvm::InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key, llvm::isa(), llvm::Instruction::isIntDivRem(), llvm::isTriviallyVectorizable(), isUndefVector(), isValidForAlternation(), and isVectorLikeInstWithConstOps().

Referenced by generateKeySubkey().

◆ getAggregateSize()

◆ getAltInstrMask()

◆ getDebugLocFromPHI()

◆ getElementIndex()

◆ getExtractIndex()

◆ getExtractWithExtendCost()

This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst is a FixedVectorType, a vector will be extracted instead of a scalar.

Definition at line 6729 of file SLPVectorizer.cpp.

References assert(), CostKind, llvm::dyn_cast(), getShuffleCost(), getWidenedType(), llvm::TargetTransformInfo::None, llvm::TargetTransformInfo::SK_ExtractSubvector, SLPReVec, and llvm::TargetTransformInfo::TCK_RecipThroughput.

Referenced by slpvectorizer::BoUpSLP::getTreeCost().

◆ getFloorFullVectorNumberOfElements()

◆ getFullVectorNumberOfElements()

◆ getGEPCosts()

Calculate the scalar and the vector costs from vectorizing set of GEPs.

Definition at line 12721 of file SLPVectorizer.cpp.

References llvm::all_of(), llvm::cast(), CostKind, llvm::dyn_cast(), llvm::ArrayRef< T >::end(), llvm::find_if(), llvm::Value::hasOneUse(), llvm::IsaPred, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SmallVectorTemplateCommon< T, typename >::size(), and llvm::TargetTransformInfo::TCC_Free.

Referenced by slpvectorizer::BoUpSLP::canVectorizeLoads(), and isMaskedLoadCompress().

◆ getInsertExtractIndex()

◆ getLocation()

◆ getMainAltOpsNoStateVL()

◆ getNonPhiOperand()

◆ getNumberOfParts()

◆ getNumberOfPotentiallyCommutativeOps()

Returns

number of operands of I, considering commutativity. Returns 2 for commutative instrinsics.

Parameters

I	The instruction to check for commutativity

Definition at line 589 of file SLPVectorizer.cpp.

References I, llvm::isa(), and isCommutative().

◆ getNumElements()

Returns

the number of elements for Ty.

Definition at line 276 of file SLPVectorizer.cpp.

References assert(), llvm::dyn_cast(), and llvm::isa().

Referenced by llvm::MachineIRBuilder::buildSplatBuildVector(), slpvectorizer::BoUpSLP::computeMinimumValueSizes(), containsUndefinedElement(), CreateGCRelocates(), expand16BitIsFinite(), expand16BitIsInf(), expand16BitIsNaN(), expand16BitIsNormal(), expandAbs(), expandCrossIntrinsic(), expandExpIntrinsic(), expandLogIntrinsic(), extractVector(), getAltInstrMask(), llvm::AArch64TTIImpl::getCastInstrCost(), llvm::BasicTTIImplBase< BasicTTIImpl >::getIndexedVectorInstrCostFromEnd(), llvm::PPCTTIImpl::getMemoryOpCost(), getNumberOfParts(), llvm::X86TTIImpl::getScalarizationOverhead(), llvm::GCNTTIImpl::getShuffleCost(), llvm::BasicTTIImplBase< BasicTTIImpl >::getTypeBasedIntrinsicInstrCost(), llvm::SystemZTTIImpl::getVectorTruncCost(), getWidenedType(), insertVector(), llvm::X86TTIImpl::isLegalMaskedExpandLoad(), llvm::X86TTIImpl::isLegalMaskedLoad(), llvm::X86TTIImpl::isLegalMaskedStore(), isValidIndirectionTable(), llvm::ShuffleVectorInst::isValidOperands(), isVectorPromotionViableForSlice(), isZero(), promoteAllocaUserToVector(), llvm::RewriteStatepointsForGC::runOnFunction(), simplifyX86pack(), llvm::InstCombinerImpl::unshuffleConstant(), and upgradeX86IntrinsicCall().

◆ getNumElems()

◆ getPartNumElems()

◆ getRdxKind()

◆ getReductionInstr()

Try and get a reduction instruction from a phi node.

Given a phi node P in a block ParentBB, consider possible reductions if they come from either ParentBB or a containing loop latch.

Returns

A candidate reduction value if possible, or if not possible.

Definition at line 25771 of file SLPVectorizer.cpp.

References llvm::cast(), llvm::DominatorTree::dominates(), llvm::dyn_cast(), llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), llvm::isa(), and P.

◆ getSameOpcode()

Returns

analysis of the Instructions in VL described in InstructionsState, the Opcode that we suppose the whole list could be vectorized even if its structure is diverse.

Definition at line 1481 of file SLPVectorizer.cpp.

References llvm::all_of(), allSameOpcode(), assert(), llvm::CmpInst::BAD_ICMP_PREDICATE, Call, llvm::cast(), llvm::SetVector< T, Vector, Set, N >::contains(), llvm::dyn_cast(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::ArrayRef< T >::end(), llvm::find_if(), findInstructionWithOpcode(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::CallBase::getBundleOperandsStartIndex(), llvm::CallBase::getCalledFunction(), llvm::VFDatabase::getMappings(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::getVectorIntrinsicIDForCall(), llvm::CallBase::hasOperandBundles(), I, llvm::SetVector< T, Vector, Set, N >::insert(), llvm::isa(), llvm::IsaPred, isCmpSameOrSwapped(), llvm::isTriviallyVectorizable(), isValidForAlternation(), isVectorLikeInstWithConstOps(), llvm::iterator_range(), llvm::User::op_begin(), llvm::ArrayRef< T >::size(), llvm::SetVector< T, Vector, Set, N >::size(), and llvm::SmallVectorTemplateCommon< T, typename >::size().

Referenced by areCompatibleCmpOps(), arePointersCompatible(), canConvertToFMA(), compareCmp(), slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), slpvectorizer::BoUpSLP::isTreeNotExtendable(), slpvectorizer::BoUpSLP::VLOperands::reorder(), and tryToFindDuplicates().

◆ getScalarizationOverhead()

◆ getShuffleCost()

◆ getShufflevectorNumGroups()

Returns

the number of groups of shufflevector A group has the following features

All of value in a group are shufflevector.
The mask of all shufflevector is isExtractSubvectorMask.
The mask of all shufflevector uses all of the elements of the source. e.g., it is 1 group (%0) %1 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> it is 2 groups (%3 and %4) %5 = shufflevector <8 x i16> %3, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %6 = shufflevector <8 x i16> %3, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %7 = shufflevector <8 x i16> %4, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %8 = shufflevector <8 x i16> %4, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> it is 0 group %12 = shufflevector <8 x i16> %10, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %13 = shufflevector <8 x i16> %11, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

Definition at line 361 of file SLPVectorizer.cpp.

References llvm::SmallBitVector::all(), llvm::all_of(), assert(), llvm::cast(), E(), llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::front(), I, llvm::IsaPred, llvm::SmallBitVector::set(), llvm::ArrayRef< T >::size(), and llvm::ArrayRef< T >::slice().

Referenced by calculateShufflevectorMask().

◆ getValueType()

Returns the type of the given value/instruction V.

If it is store, returns the type of its value operand, for Cmp - the types of the compare operands and for insertelement - the type os the inserted operand. Otherwise, just the type of the value is returned.

Definition at line 265 of file SLPVectorizer.cpp.

References llvm::dyn_cast().

Referenced by CollectOpsToWiden(), combineArithReduction(), combineBitcastvxi1(), combineMulToPMULDQ(), combineSetCC(), combineTargetShuffle(), combineToHorizontalAddSub(), CompactSwizzlableVector(), detectPMADDUBSW(), FoldBUILD_VECTOR(), foldCONCAT_VECTORS(), gatherPossiblyVectorizableLoads(), llvm::TargetLoweringBase::getAsmOperandValueType(), llvm::Function::getFunctionType(), llvm::RISCVTTIImpl::getIntImmCostInst(), llvm::TargetLoweringBase::getMemValueType(), llvm::TargetLoweringBase::getSimpleValueType(), getTestBitOperand(), llvm::TargetLoweringBase::isExtLoad(), LLVMGlobalGetValueType(), matchPMADDWD(), matchPMADDWD_2(), narrowVectorSelect(), performCONCAT_VECTORSCombine(), llvm::SelectionDAGISel::SelectInlineAsmMemoryOperands(), tryToFindDuplicates(), and llvm::SLPVectorizerPass::vectorizeCmpInsts().

◆ getVectorCallCosts()

Calculates the costs of vectorized intrinsic (if possible) and vectorized function (if possible) calls.

Returns invalid cost for the corresponding calls, if they cannot be vectorized/will be scalarized.

Definition at line 10053 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), llvm::VFShape::get(), llvm::ElementCount::getFixed(), llvm::CallBase::getFunctionType(), llvm::InstructionCost::getInvalid(), llvm::FixedVectorType::getNumElements(), llvm::getVectorIntrinsicIDForCall(), llvm::VFDatabase::getVectorizedFunction(), IntrinsicCost, llvm::CallBase::isNoBuiltin(), and llvm::TargetTransformInfo::TCK_RecipThroughput.

◆ getVectorInstrCost()

◆ getWidenedType()

Returns

the vector type of ScalarTy based on vectorization factor.

Definition at line 285 of file SLPVectorizer.cpp.

References llvm::FixedVectorType::get(), getNumElements(), and llvm::Type::getScalarType().

◆ hasFullVectorsOrPowerOf2()

◆ inversePermutation()

◆ isAlternateInstruction()

◆ isCmpSameOrSwapped()

◆ isCommutative() [1/2]

This is a helper function to check whether I is commutative.

This is a convenience wrapper that calls the two-parameter version of isCommutative with the same instruction for both parameters. This is the common case where the instruction being checked for commutativity is the same as the instruction whose uses are analyzed for special patterns (see the two-parameter version above for details).

Parameters

I	The instruction to check for commutativity

Returns

true if the instruction is commutative, false otherwise

Definition at line 584 of file SLPVectorizer.cpp.

References I, and isCommutative().

◆ isCommutative() [2/2]

Returns

True if I is commutative, handles CmpInst and BinaryOperator. For BinaryOperator, it also checks if InstWithUses is used in specific patterns that make it effectively commutative (like equality comparisons with zero). In most cases, users should not call this function directly (since I and InstWithUses are the same). However, when analyzing interchangeable instructions, we need to use the converted opcode along with the original uses.

Parameters

I	The instruction to check for commutativity
ValWithUses	The value whose uses are analyzed for special patterns

Definition at line 541 of file SLPVectorizer.cpp.

References llvm::all_of(), llvm::dyn_cast(), llvm::Value::hasNUsesOrMore(), I, llvm::Value::uses(), and UsesLimit.

Referenced by getNumberOfPotentiallyCommutativeOps(), slpvectorizer::BoUpSLP::LookAheadHeuristics::getScoreAtLevelRec(), isCommutative(), slpvectorizer::BoUpSLP::isProfitableToReorder(), and llvm::FastISel::selectBinaryOp().

◆ isConstant()

◆ isFirstInsertElement()

◆ isFixedVectorShuffle()

Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 Mask will return the Shuffle Mask equivalent to the extracted elements.

TODO: Can we split off and reuse the shuffle mask detection from ShuffleVectorInst/getShuffleCost?

Definition at line 785 of file SLPVectorizer.cpp.

References llvm::any_of(), llvm::ArrayRef< T >::begin(), llvm::cast(), llvm::dyn_cast(), E(), llvm::ArrayRef< T >::end(), llvm::find_if(), I, llvm::isa(), llvm::IsaPred, llvm::isGuaranteedNotToBePoison(), isUndefVector(), llvm::PoisonMaskElem, Select, Size, llvm::ArrayRef< T >::size(), llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::TargetTransformInfo::SK_PermuteTwoSrc, llvm::TargetTransformInfo::SK_Select, and Unknown.

◆ isLoadCombineCandidateImpl()

Definition at line 15532 of file SLPVectorizer.cpp.

References llvm::cast(), llvm::dbgs(), llvm::IntegerType::get(), llvm::Value::getContext(), llvm::isa(), LLVM_DEBUG, llvm::PatternMatch::m_APInt(), llvm::PatternMatch::m_Or(), llvm::PatternMatch::m_Shl(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::match(), and llvm::APInt::urem().

Referenced by slpvectorizer::BoUpSLP::isLoadCombineCandidate(), and slpvectorizer::BoUpSLP::isLoadCombineReductionCandidate().

◆ isMainInstruction()

Checks if the specified instruction I is an main operation for the given MainOp and AltOp instructions.

Definition at line 12292 of file SLPVectorizer.cpp.

References I.

◆ isMaskedLoadCompress() [1/2]

◆ isMaskedLoadCompress() [2/2]

bool isMaskedLoadCompress ( ArrayRef< Value * > VL, ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, const TargetTransformInfo & TTI, const DataLayout & DL, ScalarEvolution & SE, AssumptionCache & AC, const DominatorTree & DT, const TargetLibraryInfo & TLI, const function_ref< bool(Value *)> AreAllUsersVectorized, bool & IsMasked, unsigned & InterleaveFactor, SmallVectorImpl< int > & CompressMask, VectorType *& LoadVecTy )

static

Checks if the VL can be transformed to a (masked)load + compress or (masked) interleaved load.

Definition at line 6820 of file SLPVectorizer.cpp.

References assert(), llvm::ArrayRef< T >::back(), llvm::ArrayRef< T >::begin(), buildCompressMask(), llvm::CallingConv::C, llvm::cast(), CostKind, DL, llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::end(), llvm::enumerate(), llvm::ArrayRef< T >::front(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::APInt::getAllOnes(), getFullVectorNumberOfElements(), getGEPCosts(), llvm::getPointersDiff(), getScalarizationOverhead(), getShuffleCost(), getWidenedType(), I, inversePermutation(), llvm::isSafeToLoadUnconditionally(), llvm::PoisonMaskElem, reorderScalars(), llvm::TargetTransformInfo::RGK_FixedWidthVector, llvm::seq(), llvm::ArrayRef< T >::size(), llvm::SmallVectorTemplateCommon< T, typename >::size(), llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::SmallVectorImpl< T >::swap(), and llvm::TargetTransformInfo::TCK_RecipThroughput.

Referenced by slpvectorizer::BoUpSLP::canVectorizeLoads(), and isMaskedLoadCompress().

◆ isReductionCandidate()

◆ isRepeatedNonIdentityClusteredMask()

◆ isReverseOrder()

◆ isSimple()

Returns

True if the instruction is not a volatile or atomic load/store.

Definition at line 1720 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), I, and MI.

Referenced by combineBROADCAST_LOAD(), llvm::DOTGraphTraits< const MachineFunction * >::getNodeLabel(), llvm::DOTGraphTraits< DomTreeNode * >::getNodeLabel(), llvm::DOTGraphTraits< DOTFuncInfo * >::getNodeLabel(), llvm::DOTGraphTraits< DOTMachineFuncInfo * >::getNodeLabel(), llvm::DOTGraphTraits< MachineBlockFrequencyInfo * >::getNodeLabel(), llvm::DOTGraphTraits< RegionNode * >::getNodeLabel(), llvm::X86TargetLowering::shouldReduceLoadWidth(), tryToFoldExtOfMaskedLoad(), llvm::MachineBlockFrequencyInfo::view(), and llvm::MBFIWrapper::view().

◆ isSplat()

◆ isUndefVector()

template<bool IsPoisonOnly = false>

◆ isUsedOutsideBlock()

◆ isValidElementType()

bool isValidElementType ( Type * Ty)	static

Predicate for the element types that the SLP vectorizer supports.

The most important thing to filter here are types which are invalid in LLVM vectors. We also filter target specific types which have absolutely no meaningful vectorization path such as x86_fp80 and ppc_f128. This just avoids spending time checking the cost model and realizing that they will be inevitably scalarized.

Definition at line 253 of file SLPVectorizer.cpp.

References llvm::isa(), llvm::VectorType::isValidElementType(), and SLPReVec.

Referenced by slpvectorizer::BoUpSLP::canMapToVector(), compareCmp(), slpvectorizer::BoUpSLP::findReusedOrderedScalars(), gatherPossiblyVectorizableLoads(), getFloorFullVectorNumberOfElements(), getFullVectorNumberOfElements(), slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), hasFullVectorsOrPowerOf2(), and llvm::SLPVectorizerPass::vectorizeCmpInsts().

◆ isValidForAlternation()

◆ isVectorLikeInstWithConstOps()

bool isVectorLikeInstWithConstOps ( Value * V)	static

◆ matchRdxBop()

◆ performExtractsShuffleAction()

Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks.

It tries to do it in several steps.

If the Base vector is not undef vector, resizing the very first mask to have common VF and perform action for 2 input vectors (including non-undef Base). Other shuffle masks are combined with the resulting after the 1 stage and processed as a shuffle of 2 elements.
If the Base is undef vector and have only 1 shuffle mask, perform the action only for 1 vector with the given mask, if it is not the identity mask.
If > 2 masks are used, perform the remaining shuffle actions for 2 vectors, combing the masks properly between the steps.

Definition at line 16092 of file SLPVectorizer.cpp.

References llvm::SmallBitVector::all(), assert(), llvm::sampleprof::Base, buildUseMask(), E(), I, isUndefVector(), llvm::PoisonMaskElem, T, and llvm::SmallBitVector::test().

Referenced by slpvectorizer::BoUpSLP::getTreeCost().

◆ propagateMetadata()

◆ reorderOrder()

Reorders the given Order according to the given Mask.

Order - is the original order of the scalars. Procedure transforms the provided order in accordance with the given Mask. If the resulting Order is just an identity order, Order is cleared.

Definition at line 6262 of file SLPVectorizer.cpp.

References llvm::all_of(), assert(), llvm::SmallVectorImpl< T >::assign(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::SmallVectorImpl< T >::clear(), llvm::Data, llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::SmallVectorTemplateCommon< T, typename >::end(), llvm::enumerate(), fixupOrderingIndices(), I, inversePermutation(), llvm::ShuffleVectorInst::isIdentityMask(), llvm::PoisonMaskElem, reorderReuses(), llvm::SmallVectorImpl< T >::resize(), and llvm::SmallVectorImpl< T >::swap().

Referenced by slpvectorizer::BoUpSLP::getReorderingData(), slpvectorizer::BoUpSLP::reorderBottomToTop(), and slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ reorderReuses()

Reorders the given Reuses mask according to the given Mask.

Reuses contains original mask for the scalars reused in the node. Procedure transform this mask in accordance with the given Mask.

Definition at line 6248 of file SLPVectorizer.cpp.

References assert(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), E(), llvm::SmallVectorTemplateCommon< T, typename >::end(), I, llvm::PoisonMaskElem, llvm::SmallVectorTemplateCommon< T, typename >::size(), and llvm::SmallVectorImpl< T >::swap().

Referenced by slpvectorizer::BoUpSLP::reorderBottomToTop(), and reorderOrder().

◆ reorderScalars()

Reorders the list of scalars in accordance with the given Mask.

Definition at line 1829 of file SLPVectorizer.cpp.

References assert(), E(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::PoisonValue::get(), I, llvm::PoisonMaskElem, llvm::SmallVectorTemplateCommon< T, typename >::size(), and llvm::SmallVectorImpl< T >::swap().

Referenced by slpvectorizer::BoUpSLP::ShuffleCostEstimator::adjustExtracts(), slpvectorizer::BoUpSLP::ShuffleInstructionBuilder::adjustExtracts(), isMaskedLoadCompress(), slpvectorizer::BoUpSLP::processBuildVector(), slpvectorizer::BoUpSLP::reorderBottomToTop(), and slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ replicateMask()

◆ shortBundleName()

◆ STATISTIC()

STATISTIC	(	NumVectorInstructions	,
"Number of vector instructions generated"	)

◆ transformScalarShuffleIndiciesToVector()

◆ tryGetSecondaryReductionRoot()

◆ tryToFindDuplicates()

Checks that every instruction appears once in the list and if not, packs them, building ReuseShuffleIndices mask and mutating VL.

The list of unique scalars is extended by poison values to the whole register size.

Returns

false if VL could not be uniquified, in which case VL is unchanged and ReuseShuffleIndices is empty.

Definition at line 10633 of file SLPVectorizer.cpp.

References llvm::all_of(), llvm::SmallVectorImpl< T >::append(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::SmallVectorImpl< T >::clear(), llvm::dbgs(), llvm::SmallVectorImpl< T >::emplace_back(), llvm::SmallVectorTemplateCommon< T, typename >::end(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::PoisonValue::get(), getFullVectorNumberOfElements(), getSameOpcode(), getValueType(), hasFullVectorsOrPowerOf2(), llvm::isa(), llvm::IsaPred, isConstant(), LLVM_DEBUG, llvm::PoisonMaskElem, llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::size(), llvm::SmallVectorTemplateCommon< T, typename >::size(), llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::try_emplace(), slpvectorizer::BoUpSLP::EdgeInfo::UserTE, and VectorizeNonPowerOf2.

◆ tryToVectorizeSequence()

\Returns the minimum number of elements that we will attempt to vectorize.

Definition at line 26144 of file SLPVectorizer.cpp.

References llvm::ArrayRef(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::cast(), Changed, llvm::SmallVectorImpl< T >::clear(), llvm::dbgs(), llvm::dyn_cast(), E(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::SmallVectorTemplateCommon< T, typename >::end(), llvm::SmallVectorTemplateCommon< T, typename >::front(), I, llvm::isa(), LLVM_DEBUG, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SmallVectorTemplateCommon< T, typename >::size(), llvm::stable_sort(), llvm::SmallVectorImpl< T >::swap(), and T.

Referenced by llvm::SLPVectorizerPass::vectorizeCmpInsts().

◆ AliasedCheckLimit

◆ DisableTreeReorder

cl::opt< bool > DisableTreeReorder("slp-disable-tree-reorder", cl::init(false), cl::Hidden, cl::desc("Disable tree reordering even if it is " "profitable. Used for testing only.")) ( "slp-disable-tree-reorder" , cl::init(false) , cl::Hidden , cl::desc("Disable tree reordering even if it is " "profitable. Used for testing only.") )	static

◆ ForceStridedLoads

cl::opt< bool > ForceStridedLoads("slp-force-strided-loads", cl::init(false), cl::Hidden, cl::desc("Generate strided loads even if they are not " "profitable. Used for testing only.")) ( "slp-force-strided-loads" , cl::init(false) , cl::Hidden , cl::desc("Generate strided loads even if they are not " "profitable. Used for testing only.") )	static

◆ LookAheadMaxDepth

cl::opt< int > LookAheadMaxDepth("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores")) ( "slp-max-look-ahead-depth" , cl::init(2) , cl::Hidden , cl::desc("The maximum look-ahead depth for operand reordering scores") )	static

◆ MaxMemDepDistance

◆ MaxPHINumOperands

Maximum allowed number of operands in the PHI nodes.

Definition at line 244 of file SLPVectorizer.cpp.

◆ MaxProfitableLoadStride

cl::opt< unsigned > MaxProfitableLoadStride("slp-max-stride", cl::init(8), cl::Hidden, cl::desc("The maximum stride, considered to be profitable.")) ( "slp-max-stride" , cl::init(8) , cl::Hidden , cl::desc("The maximum stride, considered to be profitable.") )	static

◆ MaxVectorRegSizeOption

cl::opt< int > MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) ( "slp-max-reg-size" , cl::init(128) , cl::Hidden , cl::desc("Attempt to vectorize for this register size in bits") )	static

◆ MaxVFOption

cl::opt< unsigned > MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)")) ( "slp-max-vf" , cl::init(0) , cl::Hidden , cl::desc("Maximum SLP vectorization factor (0=unlimited)") )	static

◆ MinProfitableStridedLoads

cl::opt< unsigned > MinProfitableStridedLoads("slp-min-strided-loads", cl::init(2), cl::Hidden, cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value")) ( "slp-min-strided-loads" , cl::init(2) , cl::Hidden , cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value") )	static

◆ MinScheduleRegionSize

const int MinScheduleRegionSize = 16	static

If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.

Definition at line 241 of file SLPVectorizer.cpp.

◆ MinTreeSize

cl::opt< unsigned > MinTreeSize("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable")) ( "slp-min-tree-size" , cl::init(3) , cl::Hidden , cl::desc("Only vectorize small trees if they are fully vectorizable") )	static

◆ MinVectorRegSizeOption

cl::opt< int > MinVectorRegSizeOption("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) ( "slp-min-reg-size" , cl::init(128) , cl::Hidden , cl::desc("Attempt to vectorize for this register size in bits") )	static

◆ RecursionMaxDepth

cl::opt< unsigned > RecursionMaxDepth("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree")) ( "slp-recursion-max-depth" , cl::init(12) , cl::Hidden , cl::desc("Limit the recursion depth when building a vectorizable tree") )	static

◆ RootLookAheadMaxDepth

cl::opt< int > RootLookAheadMaxDepth("slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option")) ( "slp-max-root-look-ahead-depth" , cl::init(2) , cl::Hidden , cl::desc("The maximum look-ahead depth for searching best rooting option") )	static

◆ RunSLPVectorization

cl::opt< bool > RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes")) ( "vectorize-slp" , cl::init(true) , cl::Hidden , cl::desc("Run the SLP vectorization passes") )	static

◆ ScheduleRegionSizeBudget

cl::opt< int > ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block")) ( "slp-schedule-budget" , cl::init(100000) , cl::Hidden , cl::desc("Limit the size of the SLP scheduling region per block") )	static

Limits the size of scheduling regions in a block.

It avoid long compile times for very large blocks where vector instructions are spread over a wide range. This limit is way higher than needed by real-world functions.

Referenced by slpvectorizer::BoUpSLP::getSpillCost().

◆ ShouldStartVectorizeHorAtStore

cl::opt< bool > ShouldStartVectorizeHorAtStore("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc( "Attempt to vectorize horizontal reductions feeding into a store")) ( "slp-vectorize-hor-store" , cl::init(false) , cl::Hidden , cl::desc( "Attempt to vectorize horizontal reductions feeding into a store") )	static

◆ ShouldVectorizeHor

cl::opt< bool > ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions")) ( "slp-vectorize-hor" , cl::init(true) , cl::Hidden , cl::desc("Attempt to vectorize horizontal reductions") )	static

◆ SLPCostThreshold

cl::opt< int > SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number ")) ( "slp-threshold" , cl::init(0) , cl::Hidden , cl::desc("Only vectorize if you gain more than this " "number ") )	static

◆ SLPReVec

cl::opt< bool > SLPReVec("slp-revec", cl::init(false), cl::Hidden, cl::desc("Enable vectorization for wider vector utilization")) ( "slp-revec" , cl::init(false) , cl::Hidden , cl::desc("Enable vectorization for wider vector utilization") )	static

◆ SLPSkipEarlyProfitabilityCheck

cl::opt< bool > SLPSkipEarlyProfitabilityCheck("slp-skip-early-profitability-check", cl::init(false), cl::Hidden, cl::desc("When true, SLP vectorizer bypasses profitability checks based on " "heuristics and makes vectorization decision via cost modeling.")) ( "slp-skip-early-profitability-check" , cl::init(false) , cl::Hidden , cl::desc("When true, SLP vectorizer bypasses profitability checks based on " "heuristics and makes vectorization decision via cost modeling.") )

static

◆ SplitAlternateInstructions

cl::opt< bool > SplitAlternateInstructions("slp-split-alternate-instructions", cl::init(true), cl::Hidden, cl::desc("Improve the code quality by splitting alternate instructions")) ( "slp-split-alternate-instructions" , cl::init(true) , cl::Hidden , cl::desc("Improve the code quality by splitting alternate instructions") )	static

◆ UsesLimit

◆ VectorizeCopyableElements

Enables vectorization of copyable elements.

◆ VectorizeNonPowerOf2

cl::opt< bool > VectorizeNonPowerOf2("slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden, cl::desc("Try to vectorize with non-power-of-2 number of elements.")) ( "slp-vectorize-non-power-of-2" , cl::init(false) , cl::Hidden , cl::desc("Try to vectorize with non-power-of-2 number of elements.") )	static

◆ ViewSLPTree

cl::opt< bool > ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")) ( "view-slp-tree" , cl::Hidden , cl::desc("Display the SLP trees with Graphviz") )	static