LoopVectorize.cpp File Reference (original) (raw)

Go to the source code of this file.

Classes
class	llvm::InnerLoopVectorizer
	InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization factor (VF). More...
struct	llvm::EpilogueLoopVectorizationInfo
	Encapsulate information regarding vectorization of a loop and its epilogue. More...
class	llvm::InnerLoopAndEpilogueVectorizer
	An extension of the inner loop vectorizer that creates a skeleton for a vectorized loop that has its epilogue (residual) also vectorized. More...
class	llvm::EpilogueVectorizerMainLoop
	A specialized derived class of inner loop vectorizer that performs vectorization of main loops in the process of vectorizing loops and their epilogues. More...
class	llvm::EpilogueVectorizerEpilogueLoop
class	llvm::LoopVectorizationCostModel
	LoopVectorizationCostModel - estimates the expected speedups due to vectorization. More...
struct	llvm::LoopVectorizationCostModel::CallWideningDecision

Namespaces
namespace	PreferPredicateTy
namespace	llvm
	This is an optimization pass for GlobalISel generic memory operations.

Macros
#define	LV_NAME "loop-vectorize"
#define	DEBUG_TYPE LV_NAME

Functions
	STATISTIC (LoopsVectorized, "Number of loops vectorized")
	STATISTIC (LoopsAnalyzed, "Number of loops analyzed for vectorization")
	STATISTIC (LoopsEpilogueVectorized, "Number of epilogues vectorized")
	STATISTIC (LoopsEarlyExitVectorized, "Number of early exit loops vectorized")
static bool	hasIrregularType (Type *Ty, const DataLayout &DL)
	A helper function that returns true if the given type is irregular.
static ElementCount	getSmallConstantTripCount (ScalarEvolution SE, const Loop L)
	A version of ScalarEvolution::getSmallConstantTripCount that returns an ElementCount to include loops whose trip count is a function of vscale.
static std::optional< ElementCount >	getSmallBestKnownTC (PredicatedScalarEvolution &PSE, Loop *L, bool CanUseConstantMax=true)
	Returns "best known" trip count, which is either a valid positive trip count or std::nullopt when an estimate cannot be made (including when the trip count would overflow), for the specified loop L as defined by the following procedure: 1) Returns exact trip count if it is known.
static DebugLoc	getDebugLocFromInstOrOperands (Instruction *I)
	Look for a meaningful debug location on the instruction or its operands.
static void	debugVectorizationMessage (const StringRef Prefix, const StringRef DebugMsg, Instruction *I)
	Write a DebugMsg about vectorization to the debug output stream.
static OptimizationRemarkAnalysis	createLVAnalysis (const char PassName, StringRef RemarkName, Loop TheLoop, Instruction *I, DebugLoc DL={})
	Create an analysis remark that explains why vectorization failed.
Value *	llvm::createStepForVF (IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
	Return a value for Step multiplied by VF.
Value *	llvm::getRuntimeVF (IRBuilderBase &B, Type *Ty, ElementCount VF)
	Return the runtime value for VF.
LLVM_ABI void	llvm::reportVectorizationFailure (const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter ORE, Loop TheLoop, Instruction *I=nullptr)
	Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding optimization remark RemarkName.
static void	llvm::reportVectorizationInfo (const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter ORE, Loop TheLoop, Instruction *I=nullptr, DebugLoc DL={})
	Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
static void	llvm::reportVectorization (OptimizationRemarkEmitter ORE, Loop TheLoop, VectorizationFactor VF, unsigned IC)
	Report successful vectorization of the loop.
static bool	useActiveLaneMask (TailFoldingStyle Style)
static bool	useActiveLaneMaskForControlFlow (TailFoldingStyle Style)
static bool	isExplicitVecOuterLoop (Loop OuterLp, OptimizationRemarkEmitter ORE)
static void	collectSupportedLoops (Loop &L, LoopInfo LI, OptimizationRemarkEmitter ORE, SmallVectorImpl< Loop * > &V)
static Value *	emitTransformedIndex (IRBuilderBase &B, Value Index, Value StartValue, Value Step, InductionDescriptor::InductionKind InductionKind, const BinaryOperator InductionBinOp)
	Compute the transformed value of Index at offset StartValue using step StepValue.
static std::optional< unsigned >	getMaxVScale (const Function &F, const TargetTransformInfo &TTI)
static bool	isIndvarOverflowCheckKnownFalse (const LoopVectorizationCostModel *Cost, ElementCount VF, std::optional< unsigned > UF=std::nullopt)
	For the given VF and UF and maximum trip count computed for the loop, return whether the induction variable might overflow in the vectorized loop.
static bool	useMaskedInterleavedAccesses (const TargetTransformInfo &TTI)
static VPIRBasicBlock *	replaceVPBBWithIRVPBB (VPBasicBlock VPBB, BasicBlock IRBB, VPlan *Plan=nullptr)
	Replace VPBB with a VPIRBasicBlock wrapping IRBB.
static Value *	getExpandedStep (const InductionDescriptor &ID, const SCEV2ValueTy &ExpandedSCEVs)
	Return the expanded step for ID using ExpandedSCEVs to look up SCEV expansion results.
static void	addFullyUnrolledInstructionsToIgnore (Loop L, const LoopVectorizationLegality::InductionList &IL, SmallPtrSetImpl< Instruction > &InstsToIgnore)
	Knowing that loop L executes a single vector iteration, add instructions that will get simplified and thus should not have any cost to InstsToIgnore.
static void	legacyCSE (BasicBlock *BB)
	FIXME: This legacy common-subexpression-elimination routine is scheduled for removal, in favor of the VPlan-based one.
static unsigned	estimateElementCount (ElementCount VF, std::optional< unsigned > VScale)
	This function attempts to return a value that represents the ElementCount at runtime.
static Type *	maybeVectorizeType (Type *Ty, ElementCount VF)
static bool	willGenerateVectors (VPlan &Plan, ElementCount VF, const TargetTransformInfo &TTI)
	Check if any recipe of Plan will generate a vector value, which will be assigned a vector register.
static bool	hasReplicatorRegion (VPlan &Plan)
static const SCEV *	getAddressAccessSCEV (Value Ptr, LoopVectorizationLegality Legal, PredicatedScalarEvolution &PSE, const Loop *TheLoop)
	Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence.
static ElementCount	determineVPlanVF (const TargetTransformInfo &TTI, LoopVectorizationCostModel &CM)
static bool	planContainsAdditionalSimplifications (VPlan &Plan, VPCostContext &CostCtx, Loop *TheLoop, ElementCount VF)
	Return true if the original loop \ TheLoop contains any instructions that do not have corresponding recipes in Plan and are not marked to be ignored in CostCtx.
static Value *	getStartValueFromReductionResult (VPInstruction *RdxResult)
static void	fixReductionScalarResumeWhenVectorizingEpilog (VPPhi EpiResumePhiR, PHINode &EpiResumePhi, BasicBlock BypassBlock)
static VPWidenIntOrFpInductionRecipe *	createWidenInductionRecipes (VPInstruction *PhiR, const InductionDescriptor &IndDesc, VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop)
	Creates a VPWidenIntOrFpInductionRecipe for PhiR.
static ScalarEpilogueLowering	getScalarEpilogueLowering (Function F, Loop L, LoopVectorizeHints &Hints, bool OptForSize, TargetTransformInfo TTI, TargetLibraryInfo TLI, LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI)
static bool	processLoopInVPlanNativePath (Loop L, PredicatedScalarEvolution &PSE, LoopInfo LI, DominatorTree DT, LoopVectorizationLegality LVL, TargetTransformInfo TTI, TargetLibraryInfo TLI, DemandedBits DB, AssumptionCache AC, OptimizationRemarkEmitter *ORE, std::function< BlockFrequencyInfo &()> GetBFI, bool OptForSize, LoopVectorizeHints &Hints, LoopVectorizationRequirements &Requirements)
static void	checkMixedPrecision (Loop L, OptimizationRemarkEmitter ORE)
static InstructionCost	calculateEarlyExitCost (VPCostContext &CostCtx, VPlan &Plan, ElementCount VF)
	For loops with uncountable early exits, find the cost of doing work when exiting the loop early, such as calculating the final exit values of variables used outside the loop.
static bool	isOutsideLoopWorkProfitable (GeneratedRTChecks &Checks, VectorizationFactor &VF, Loop *L, PredicatedScalarEvolution &PSE, VPCostContext &CostCtx, VPlan &Plan, ScalarEpilogueLowering SEL, std::optional< unsigned > VScale)
	This function determines whether or not it's still profitable to vectorize the loop given the extra work we have to do outside of the loop:
static void	preparePlanForMainVectorLoop (VPlan &MainPlan, VPlan &EpiPlan)
	Prepare MainPlan for vectorizing the main vector loop during epilogue vectorization.
static SmallVector< Instruction * >	preparePlanForEpilogueVectorLoop (VPlan &Plan, Loop *L, const SCEV2ValueTy &ExpandedSCEVs, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel &CM, ScalarEvolution &SE)
	Prepare Plan for vectorizing the epilogue loop.
static Value *	createInductionAdditionalBypassValues (PHINode OrigPhi, const InductionDescriptor &II, IRBuilder<> &BypassBuilder, const SCEV2ValueTy &ExpandedSCEVs, Value MainVectorTripCount, Instruction *OldInduction)
static void	fixScalarResumeValuesFromBypass (BasicBlock BypassBlock, Loop L, VPlan &BestEpiPlan, LoopVectorizationLegality &LVL, const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount)
static void	connectEpilogueVectorLoop (VPlan &EpiPlan, Loop L, EpilogueLoopVectorizationInfo &EPI, DominatorTree DT, LoopVectorizationLegality &LVL, DenseMap< const SCEV , Value > &ExpandedSCEVs, GeneratedRTChecks &Checks, ArrayRef< Instruction * > InstsToMove)
	Connect the epilogue vector loop generated for EpiPlan to the main vector.

Variables
const char	VerboseDebug [] = DEBUG_TYPE "-verbose"
static cl::opt< bool >	EnableEpilogueVectorization ("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops."))
static cl::opt< unsigned >	EpilogueVectorizationForceVF ("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops."))
static cl::opt< unsigned >	EpilogueVectorizationMinVF ("epilogue-vectorization-minimum-VF", cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization."))
static cl::opt< unsigned >	TinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))
	Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred.
static cl::opt< unsigned >	VectorizeMemoryCheckThreshold ("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks"))
static cl::opt< PreferPredicateTy::Option >	PreferPredicateOverEpilogue ("prefer-predicate-over-epilogue", cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden, cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop."), cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails.")))
static cl::opt< TailFoldingStyle >	ForceTailFoldingStyle ("force-tail-folding-style", cl::desc("Force the tail folding style"), cl::init(TailFoldingStyle::None), cl::values(clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN(TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask.")))
static cl::opt< bool >	MaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
static cl::opt< bool >	EnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
static cl::opt< bool >	EnableMaskedInterleavedMemAccesses ("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"))
	An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps.
static cl::opt< unsigned >	ForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static cl::opt< unsigned >	ForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static cl::opt< unsigned >	ForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))
static cl::opt< unsigned >	ForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))
static cl::opt< bool >	ForceTargetSupportsScalableVectors ("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))
static cl::opt< unsigned >	SmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
static cl::opt< bool >	LoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))
static cl::opt< bool >	EnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
static cl::opt< unsigned >	NumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
	The number of stores in a loop that are allowed to need predication.
static cl::opt< bool >	EnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
static cl::opt< bool >	EnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static cl::opt< unsigned >	MaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))
static cl::opt< bool >	PreferInLoopReductions ("prefer-inloop-reductions", cl::init(false), cl::Hidden, cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference."))
static cl::opt< bool >	ForceOrderedReductions ("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions"))
static cl::opt< bool >	PreferPredicatedReductionSelect ("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc("Prefer predicating a reduction operation over an after loop select."))
static cl::opt< bool >	VPlanBuildStressTest ("vplan-build-stress-test", cl::init(false), cl::Hidden, cl::desc("Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path)."))
static cl::opt< cl::boolOrDefault >	ForceSafeDivisor ("force-widen-divrem-via-safe-divisor", cl::Hidden, cl::desc("Override cost based safe divisor widening for div/rem instructions"))
static cl::opt< bool >	UseWiderVFIfCallVariantsPresent ("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))
static cl::opt< bool >	EnableEarlyExitVectorization ("enable-early-exit-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of early exit loops with uncountable exits."))
static cl::opt< bool >	ConsiderRegPressure ("vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, cl::desc("Discard VFs if their register pressure is too high."))
static constexpr uint32_t	MinItersBypassWeights [] = {1, 127}

◆ DEBUG_TYPE

◆ LV_NAME

#define LV_NAME "loop-vectorize"

◆ addFullyUnrolledInstructionsToIgnore()

◆ calculateEarlyExitCost()

◆ checkMixedPrecision()

Definition at line 9278 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::OptimizationRemarkEmitter::emit(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), I, llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::isa(), LV_NAME, llvm::SmallVectorImpl< T >::pop_back_val(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().

Referenced by llvm::LoopVectorizePass::processLoop().

◆ collectSupportedLoops()

◆ connectEpilogueVectorLoop()

Connect the epilogue vector loop generated for EpiPlan to the main vector.

Definition at line 9796 of file LoopVectorize.cpp.

References llvm::GenericDomTreeUpdater< DerivedT, DomTreeT, PostDomTreeT >::applyUpdates(), assert(), llvm::cast(), llvm::DominatorTreeBase< BasicBlock, false >::Delete, llvm::EpilogueLoopVectorizationInfo::EpilogueIterationCountCheck, fixScalarResumeValuesFromBypass(), llvm::VPlan::getEntry(), llvm::BasicBlock::getFirstNonPHIIt(), llvm::VPlan::getScalarPreheader(), llvm::BasicBlock::getSinglePredecessor(), llvm::BasicBlock::getTerminator(), I, llvm::DominatorTreeBase< BasicBlock, false >::Insert, llvm::EpilogueLoopVectorizationInfo::MainLoopIterationCountCheck, llvm::make_pointer_range(), llvm::none_of(), llvm::BasicBlock::phis(), llvm::User::replaceUsesOfWith(), and llvm::EpilogueLoopVectorizationInfo::VectorTripCount.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ createInductionAdditionalBypassValues()

◆ createLVAnalysis()

Create an analysis remark that explains why vectorization failed.

PassName is the name of the pass (e.g. can be AlwaysPrint). RemarkName is the identifier for the remark. If I is passed it is an instruction that prevents vectorization. Otherwise TheLoop is used for the location of the remark. If DL is passed, use it as debug location for the remark.

Returns

the remark object that can be streamed to.

Definition at line 756 of file LoopVectorize.cpp.

References DL, I, and PassName.

Referenced by llvm::reportVectorizationFailure().

◆ createWidenInductionRecipes()

Creates a VPWidenIntOrFpInductionRecipe for PhiR.

If needed, it will also insert a recipe to expand the step for the induction recipe.

Definition at line 7694 of file LoopVectorize.cpp.

References assert(), llvm::cast(), llvm::VPRecipeBase::getDebugLoc(), llvm::VPValue::getDefiningRecipe(), llvm::vputils::getFlagsFromIndDesc(), llvm::VPlan::getLiveIn(), llvm::VPUser::getOperand(), llvm::vputils::getOrCreateVPValueForSCEVExpr(), llvm::ScalarEvolution::getSCEV(), llvm::vputils::getSCEVExprForVPValue(), llvm::InductionDescriptor::getStartValue(), llvm::InductionDescriptor::getStep(), llvm::Value::getType(), llvm::VPSingleDefRecipe::getUnderlyingInstr(), llvm::VPlan::getVF(), llvm::ScalarEvolution::isLoopInvariant(), llvm::ScalarEvolution::isSCEVable(), llvm::PatternMatch::m_Add(), llvm::PatternMatch::m_Specific(), llvm::VPlanPatternMatch::m_VPValue(), llvm::PatternMatch::match(), and llvm::VPUser::setOperand().

◆ debugVectorizationMessage()

◆ determineVPlanVF()

◆ emitTransformedIndex()

Compute the transformed value of Index at offset StartValue using step StepValue.

For integer induction, returns StartValue + Index * StepValue. For pointer induction, returns StartValue[Index * StepValue]. FIXME: The newly created binary instructions should contain nsw/nuw flags, which can be found from the original scalar operations.

Definition at line 2149 of file LoopVectorize.cpp.

References assert(), B(), llvm::cast(), CreateAdd(), CreateMul(), llvm::dyn_cast(), llvm::Value::getName(), llvm::BinaryOperator::getOpcode(), llvm::Value::getType(), llvm::InductionDescriptor::IK_FpInduction, llvm::InductionDescriptor::IK_IntInduction, llvm::InductionDescriptor::IK_NoInduction, llvm::InductionDescriptor::IK_PtrInduction, llvm::isa(), llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), llvm_unreachable, llvm::PatternMatch::m_One(), llvm::MIPatternMatch::m_ZeroInt(), llvm::SCEVPatternMatch::match(), llvm::Offset, llvm::Value::setName(), X, and Y.

Referenced by createInductionAdditionalBypassValues(), and llvm::VPDerivedIVRecipe::execute().

◆ estimateElementCount()

This function attempts to return a value that represents the ElementCount at runtime.

For fixed-width VFs we know this precisely at compile time, but for scalable VFs we calculate it based on an estimate of the vscale value.

Definition at line 2513 of file LoopVectorize.cpp.

References assert(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), and llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable().

Referenced by llvm::LoopVectorizationPlanner::executePlan(), llvm::LoopVectorizationCostModel::isEpilogueVectorizationProfitable(), isOutsideLoopWorkProfitable(), preparePlanForEpilogueVectorLoop(), llvm::LoopVectorizationPlanner::selectEpilogueVectorizationFactor(), and llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ fixReductionScalarResumeWhenVectorizingEpilog()

void fixReductionScalarResumeWhenVectorizingEpilog ( VPPhi * EpiResumePhiR, PHINode & EpiResumePhi, BasicBlock * BypassBlock )	static

Definition at line 7272 of file LoopVectorize.cpp.

References assert(), llvm::VPInstruction::Broadcast, llvm::cast(), llvm::VPInstruction::ComputeAnyOfResult, llvm::VPInstruction::ComputeFindIVResult, llvm::VPInstruction::ComputeReductionResult, llvm::dyn_cast(), llvm::PHINode::getIncomingValueForBlock(), llvm::VPUser::getOperand(), getStartValueFromReductionResult(), llvm::CmpInst::ICMP_EQ, llvm::CmpInst::ICMP_NE, llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind(), llvm::RecurrenceDescriptor::isFindIVRecurrenceKind(), llvm::isGuaranteedNotToBeUndefOrPoison(), llvm::MIPatternMatch::m_OneUse(), llvm::PatternMatch::m_Select(), llvm::PatternMatch::m_Specific(), llvm::PatternMatch::m_SpecificICmp(), llvm::PatternMatch::m_Value(), llvm::VPlanPatternMatch::m_VPValue(), llvm::VPlanPatternMatch::m_ZExtOrSExt(), llvm::PatternMatch::match(), llvm::VPInstruction::ReductionStartVector, and llvm::PHINode::setIncomingValueForBlock().

Referenced by fixScalarResumeValuesFromBypass().

◆ fixScalarResumeValuesFromBypass()

Definition at line 9754 of file LoopVectorize.cpp.

References llvm::cast(), createInductionAdditionalBypassValues(), fixReductionScalarResumeWhenVectorizingEpilog(), llvm::BasicBlock::getFirstInsertionPt(), llvm::LoopVectorizationLegality::getInductionVars(), llvm::LoopVectorizationLegality::getPrimaryInduction(), llvm::VPlan::getScalarPreheader(), II, llvm::BasicBlock::phis(), llvm::predecessors(), and llvm::zip().

Referenced by connectEpilogueVectorLoop().

◆ getAddressAccessSCEV()

◆ getDebugLocFromInstOrOperands()

◆ getExpandedStep()

◆ getMaxVScale()

◆ getScalarEpilogueLowering()

Definition at line 9171 of file LoopVectorize.cpp.

References llvm::CM_ScalarEpilogueAllowed, llvm::CM_ScalarEpilogueNotAllowedOptSize, llvm::CM_ScalarEpilogueNotAllowedUsePredicate, llvm::CM_ScalarEpilogueNotNeededUsePredicate, F, llvm::LoopVectorizeHints::FK_Disabled, llvm::LoopVectorizeHints::FK_Enabled, llvm::LoopVectorizeHints::getForce(), llvm::LoopVectorizeHints::getPredicate(), PreferPredicateTy::PredicateElseScalarEpilogue, PreferPredicateTy::PredicateOrDontVectorize, PreferPredicateOverEpilogue, and PreferPredicateTy::ScalarEpilogue.

Referenced by llvm::LoopVectorizePass::processLoop(), and processLoopInVPlanNativePath().

◆ getSmallBestKnownTC()

Returns "best known" trip count, which is either a valid positive trip count or std::nullopt when an estimate cannot be made (including when the trip count would overflow), for the specified loop L as defined by the following procedure: 1) Returns exact trip count if it is known.

Returns expected trip count according to profile data if any. 3) Returns upper bound estimate if known, and if CanUseConstantMax. 4) Returns std::nullopt if all of the above failed.

Definition at line 453 of file LoopVectorize.cpp.

References llvm::ElementCount::getFixed(), llvm::getLoopEstimatedTripCount(), llvm::PredicatedScalarEvolution::getSE(), llvm::PredicatedScalarEvolution::getSmallConstantMaxTripCount(), getSmallConstantTripCount(), and LoopVectorizeWithBlockFrequency.

Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), isOutsideLoopWorkProfitable(), llvm::LoopVectorizePass::processLoop(), and llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ getSmallConstantTripCount()

A version of ScalarEvolution::getSmallConstantTripCount that returns an ElementCount to include loops whose trip count is a function of vscale.

Definition at line 422 of file LoopVectorize.cpp.

References llvm::cast(), llvm::APInt::getActiveBits(), llvm::ScalarEvolution::getBackedgeTakenCount(), llvm::ElementCount::getFixed(), llvm::ElementCount::getScalable(), llvm::ScalarEvolution::getSmallConstantTripCount(), llvm::ScalarEvolution::getTripCountFromExitCount(), llvm::SCEV::getType(), llvm::APInt::getZExtValue(), hasNoUnsignedWrap(), llvm::isa(), llvm::SCEVPatternMatch::m_scev_APInt(), llvm::SCEVPatternMatch::m_scev_Mul(), llvm::SCEVPatternMatch::m_SCEVVScale(), and llvm::SCEVPatternMatch::match().

Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), llvm::LoopVectorizationCostModel::expectedCost(), getSmallBestKnownTC(), and llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ getStartValueFromReductionResult()

◆ hasIrregularType()

◆ hasReplicatorRegion()

bool hasReplicatorRegion ( VPlan & Plan)	static

◆ isExplicitVecOuterLoop()

Definition at line 2084 of file LoopVectorize.cpp.

References llvm::LoopVectorizeHints::allowVectorization(), assert(), llvm::dbgs(), llvm::LoopVectorizeHints::emitRemarkWithHints(), llvm::LoopVectorizeHints::FK_Undefined, llvm::LoopVectorizeHints::getForce(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::LoopVectorizeHints::getInterleave(), llvm::BasicBlock::getParent(), llvm::LoopBase< BlockT, LoopT >::isInnermost(), and LLVM_DEBUG.

Referenced by collectSupportedLoops().

◆ isIndvarOverflowCheckKnownFalse()

◆ isOutsideLoopWorkProfitable()

This function determines whether or not it's still profitable to vectorize the loop given the extra work we have to do outside of the loop:

Perform the runtime checks before entering the loop to ensure it's safe to vectorize.
In the case of loops with uncountable early exits, we may have to do extra work when exiting the loop early, such as calculating the final exit values of variables used outside the loop.
The middle block, if expected TC <= VF.Width.

Definition at line 9351 of file LoopVectorize.cpp.

References llvm::alignTo(), calculateEarlyExitCost(), llvm::CM_ScalarEpilogueAllowed, llvm::VectorizationFactor::Cost, llvm::VPBasicBlock::cost(), llvm::dbgs(), llvm::divideCeil(), estimateElementCount(), llvm::ElementCount::getFixed(), llvm::VPlan::getMiddleBlock(), getSmallBestKnownTC(), llvm::InstructionCost::getValue(), llvm::details::FixedOrScalableQuantity< ElementCount, unsigned >::isKnownLT(), llvm::ElementCount::isScalar(), llvm::InstructionCost::isValid(), LLVM_DEBUG, llvm::VectorizationFactor::MinProfitableTripCount, llvm::VectorizationFactor::ScalarCost, VectorizeMemoryCheckThreshold, and llvm::VectorizationFactor::Width.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ legacyCSE()

◆ maybeVectorizeType()

◆ planContainsAdditionalSimplifications()

Return true if the original loop \ TheLoop contains any instructions that do not have corresponding recipes in Plan and are not marked to be ignored in CostCtx.

This means the VPlan contains simplification that the legacy cost-model did not account for.

If a VPlan transform folded a recipe to one producing a single-scalar, but the original instruction wasn't uniform-after-vectorization in the legacy cost model, the legacy cost overestimates the actual cost.

Definition at line 7012 of file LoopVectorize.cpp.

References llvm::any_of(), llvm::LoopBase< BlockT, LoopT >::blocks(), llvm::VPBlockUtils::blocksOnly(), llvm::cast(), llvm::dyn_cast(), llvm::dyn_cast_or_null(), llvm::VPInstruction::FirstOrderRecurrenceSplice, llvm::VPRegionBlock::getEntry(), llvm::getLoadStorePointerOperand(), llvm::VPlan::getVectorLoopRegion(), llvm::VPlan::getVectorPreheader(), I, llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::insert(), IR, llvm::isa(), llvm::VPCostContext::isLegacyUniformAfterVectorization(), llvm::ElementCount::isScalar(), llvm::vputils::isSingleScalar(), llvm::PatternMatch::m_Cmp(), llvm::SCEVPatternMatch::match(), llvm::PatternMatch::match_fn(), llvm::none_of(), and llvm::vp_depth_first_deep().

◆ preparePlanForEpilogueVectorLoop()

Prepare Plan for vectorizing the epilogue loop.

That is, re-use expanded SCEVs from ExpandedSCEVs and set resume values for header recipes. Some reductions require creating new instructions to compute the resume values. They are collected in a vector and returned. They must be moved to the preheader of the vector epilogue loop, after created by the execution of Plan.

Definition at line 9559 of file LoopVectorize.cpp.

References llvm::Add, llvm::VPlanTransforms::addMinimumVectorEpilogueIterationCheck(), llvm::all_of(), assert(), llvm::cast(), llvm::drop_begin(), llvm::dyn_cast(), llvm::EpilogueLoopVectorizationInfo::EpilogueUF, llvm::EpilogueLoopVectorizationInfo::EpilogueVF, estimateElementCount(), llvm::find_if(), llvm::VPRegionBlock::getCanonicalIV(), llvm::VPlan::getEntry(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::BasicBlock::getFirstNonPHIIt(), llvm::PHINode::getIncomingValueForBlock(), llvm::VPValue::getLiveInIRValue(), llvm::PHINode::getNumIncomingValues(), llvm::VPInstruction::getOpcode(), llvm::User::getOperand(), llvm::VPUser::getOperand(), llvm::VPlan::getOrAddLiveIn(), llvm::BasicBlock::getParent(), llvm::VPHeaderPHIRecipe::getStartValue(), getStartValueFromReductionResult(), llvm::VPlan::getTripCount(), llvm::VPlan::getVectorLoopRegion(), llvm::LoopVectorizationCostModel::getVScaleForTuning(), I, llvm::PHINode::incoming_values(), llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind(), llvm::RecurrenceDescriptor::isFindIVRecurrenceKind(), llvm::ElementCount::isVector(), IV, llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::lookup(), llvm::PatternMatch::m_SpecificInt(), llvm::EpilogueLoopVectorizationInfo::MainLoopIterationCountCheck, llvm::EpilogueLoopVectorizationInfo::MainLoopUF, llvm::EpilogueLoopVectorizationInfo::MainLoopVF, llvm::make_early_inc_range(), llvm::PatternMatch::match(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::VPInstruction::ReductionStartVector, llvm::VPValue::replaceAllUsesWith(), llvm::LoopVectorizationCostModel::requiresScalarEpilogue(), llvm::VPlan::resetTripCount(), llvm::Sentinel, llvm::VPBlockBase::setName(), llvm::VPUser::setOperand(), llvm::EpilogueLoopVectorizationInfo::TripCount, and llvm::EpilogueLoopVectorizationInfo::VectorTripCount.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ preparePlanForMainVectorLoop()

void preparePlanForMainVectorLoop ( VPlan & MainPlan, VPlan & EpiPlan )	static

Prepare MainPlan for vectorizing the main vector loop during epilogue vectorization.

Remove ResumePhis from MainPlan for inductions that don't have a corresponding wide induction in EpiPlan.

Definition at line 9468 of file LoopVectorize.cpp.

References llvm::VPBasicBlock::begin(), llvm::cast(), llvm::VPInstruction::ComputeFindIVResult, llvm::SmallPtrSetImpl< PtrType >::contains(), llvm::VPBuilder::createNaryOp(), llvm::VPBuilder::createScalarPhi(), llvm::dyn_cast(), llvm::iterator_range< IteratorT >::end(), llvm::VPBasicBlock::end(), llvm::VPRecipeBase::eraseFromParent(), llvm::find_if(), llvm::VPRegionBlock::getCanonicalIV(), llvm::VPValue::getDefiningRecipe(), llvm::VPlan::getEntry(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPValue::getLiveInIRValue(), llvm::VPlan::getMiddleBlock(), llvm::VPInstruction::getOpcode(), llvm::VPUser::getOperand(), llvm::VPlan::getScalarHeader(), llvm::VPlan::getScalarPreheader(), llvm::VPHeaderPHIRecipe::getStartValue(), llvm::VPlan::getVectorLoopRegion(), llvm::VPlan::getVectorTripCount(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::isa(), llvm::isGuaranteedNotToBeUndefOrPoison(), llvm::make_early_inc_range(), llvm::VPRecipeBase::moveBefore(), llvm::VPBasicBlock::phis(), llvm::VPlanTransforms::removeDeadRecipes(), llvm::VPValue::replaceUsesWithIf(), llvm::VPInstruction::ResumeForEpilogue, llvm::VPlanTransforms::runPass(), and llvm::VPUser::setOperand().

Referenced by llvm::LoopVectorizePass::processLoop().

◆ processLoopInVPlanNativePath()

bool processLoopInVPlanNativePath ( Loop * L, PredicatedScalarEvolution & PSE, LoopInfo * LI, DominatorTree * DT, LoopVectorizationLegality * LVL, TargetTransformInfo * TTI, TargetLibraryInfo * TLI, DemandedBits * DB, AssumptionCache * AC, OptimizationRemarkEmitter * ORE, std::function< BlockFrequencyInfo &()> GetBFI, bool OptForSize, LoopVectorizeHints & Hints, LoopVectorizationRequirements & Requirements )

static

Definition at line 9213 of file LoopVectorize.cpp.

References llvm::LoopVectorizationPlanner::addMinimumIterationCheck(), assert(), llvm::LoopVectorizationCostModel::collectElementTypesForWidening(), llvm::LoopVectorizationCostModel::CostKind, llvm::dbgs(), llvm::VectorizationFactor::Disabled(), llvm::EnableVPlanNativePath, llvm::LoopVectorizationPlanner::executePlan(), F, llvm::PredicatedScalarEvolution::getBackedgeTakenCount(), llvm::LoopVectorizationLegality::getLAI(), llvm::LoopVectorizationPlanner::getPlanFor(), getScalarEpilogueLowering(), llvm::LoopVectorizeHints::getWidth(), llvm::isa(), LLVM_DEBUG, llvm::VectorizationFactor::MinProfitableTripCount, llvm::LoopVectorizationPlanner::planInVPlanNativePath(), llvm::reportVectorization(), llvm::verifyFunction(), VPlanBuildStressTest, and llvm::VectorizationFactor::Width.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ replaceVPBBWithIRVPBB()

Replace VPBB with a VPIRBasicBlock wrapping IRBB.

All recipes from VPBB are moved to the end of the newly created VPIRBasicBlock. All predecessors and successors of VPBB, if any, are rewired to the new VPIRBasicBlock. If VPBB may be unreachable, Plan must be passed.

Definition at line 2379 of file LoopVectorize.cpp.

References llvm::VPBasicBlock::begin(), llvm::VPBasicBlock::end(), llvm::VPBasicBlock::getFirstNonPhi(), llvm::VPBlockBase::getPlan(), llvm::make_early_inc_range(), llvm::make_range(), llvm::VPBasicBlock::phis(), and llvm::VPBlockUtils::reassociateBlocks().

Referenced by llvm::EpilogueVectorizerMainLoop::emitIterationCountCheck(), and llvm::LoopVectorizationPlanner::executePlan().

◆ STATISTIC() [1/4]

STATISTIC	(	LoopsAnalyzed	,
"Number of loops analyzed for vectorization"	)

◆ STATISTIC() [2/4]

STATISTIC	(	LoopsEarlyExitVectorized	,
"Number of early exit loops vectorized"	)

◆ STATISTIC() [3/4]

STATISTIC	(	LoopsEpilogueVectorized	,
"Number of epilogues vectorized"	)

◆ STATISTIC() [4/4]

STATISTIC	(	LoopsVectorized	,
"Number of loops vectorized"	)

◆ useActiveLaneMask()

◆ useActiveLaneMaskForControlFlow()

◆ useMaskedInterleavedAccesses()

◆ willGenerateVectors()

Check if any recipe of Plan will generate a vector value, which will be assigned a vector register.

Definition at line 4079 of file LoopVectorize.cpp.

References llvm::any_of(), assert(), llvm::VPBlockUtils::blocksOnly(), llvm::collectEphemeralRecipesForVPlan(), llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::contains(), llvm::getContainedTypes(), llvm::VPRegionBlock::getEntry(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getFixedValue(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), llvm::VPlan::getVectorLoopRegion(), llvm::VPTypeAnalysis::inferScalarType(), llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::insert(), llvm::isa(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable(), llvm::ElementCount::isVector(), llvm_unreachable, llvm::toVectorizedTy(), and llvm::vp_depth_first_shallow().

Referenced by llvm::LoopVectorizationPlanner::computeBestVF().

◆ ConsiderRegPressure

cl::opt< bool > ConsiderRegPressure("vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, cl::desc("Discard VFs if their register pressure is too high.")) ( "vectorizer-consider-reg-pressure" , cl::init(false) , cl::Hidden , cl::desc("Discard VFs if their register pressure is too high.") )	static

◆ EnableCondStoresVectorization

cl::opt< bool > EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) ( "enable-cond-stores-vec" , cl::init(true) , cl::Hidden , cl::desc("Enable if predication of stores during vectorization.") )	static

◆ EnableEarlyExitVectorization

cl::opt< bool > EnableEarlyExitVectorization("enable-early-exit-vectorization", cl::init(true), cl::Hidden, cl::desc( "Enable vectorization of early exit loops with uncountable exits.")) ( "enable-early-exit-vectorization" , cl::init(true) , cl::Hidden , cl::desc( "Enable vectorization of early exit loops with uncountable exits.") )	static

◆ EnableEpilogueVectorization

cl::opt< bool > EnableEpilogueVectorization("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops.")) ( "enable-epilogue-vectorization" , cl::init(true) , cl::Hidden , cl::desc("Enable vectorization of epilogue loops.") )	static

◆ EnableIndVarRegisterHeur

cl::opt< bool > EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving")) ( "enable-ind-var-reg-heur" , cl::init(true) , cl::Hidden , cl::desc("Count the induction variable only once when interleaving") )	static

◆ EnableInterleavedMemAccesses

cl::opt< bool > EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")) ( "enable-interleaved-mem-accesses" , cl::init(false) , cl::Hidden , cl::desc("Enable vectorization on interleaved memory accesses in a loop") )	static

◆ EnableLoadStoreRuntimeInterleave

cl::opt< bool > EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc( "Enable runtime interleaving until load/store ports are saturated")) ( "enable-loadstore-runtime-interleave" , cl::init(true) , cl::Hidden , cl::desc( "Enable runtime interleaving until load/store ports are saturated") )	static

◆ EnableMaskedInterleavedMemAccesses

cl::opt< bool > EnableMaskedInterleavedMemAccesses("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop")) ( "enable-masked-interleaved-mem-accesses" , cl::init(false) , cl::Hidden , cl::desc("Enable vectorization on masked interleaved memory accesses in a loop") )	static

An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps.

Referenced by useMaskedInterleavedAccesses().

◆ EpilogueVectorizationForceVF

cl::opt< unsigned > EpilogueVectorizationForceVF("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops.")) ( "epilogue-vectorization-force-VF" , cl::init(1) , cl::Hidden , cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops.") )

static

◆ EpilogueVectorizationMinVF

cl::opt< unsigned > EpilogueVectorizationMinVF("epilogue-vectorization-minimum-VF", cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization.")) ( "epilogue-vectorization-minimum-VF" , cl::Hidden , cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization.") )

static

◆ ForceOrderedReductions

cl::opt< bool > ForceOrderedReductions("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")) ( "force-ordered-reductions" , cl::init(false) , cl::Hidden , cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions") )	static

◆ ForceSafeDivisor

cl::opt< cl::boolOrDefault > ForceSafeDivisor("force-widen-divrem-via-safe-divisor", cl::Hidden, cl::desc( "Override cost based safe divisor widening for div/rem instructions")) ( "force-widen-divrem-via-safe-divisor" , cl::Hidden , cl::desc( "Override cost based safe divisor widening for div/rem instructions") )	static

◆ ForceTailFoldingStyle

cl::opt< TailFoldingStyle > ForceTailFoldingStyle("force-tail-folding-style", cl::desc("Force the tail folding style"), cl::init(TailFoldingStyle::None), cl::values( clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN( TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask."))) ( "force-tail-folding-style" , cl::desc("Force the tail folding style") , cl::init(TailFoldingStyle::None) , cl::values( clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN( TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask.")) )

static

◆ ForceTargetMaxScalarInterleaveFactor

cl::opt< unsigned > ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.")) ( "force-target-max-scalar-interleave" , cl::init(0) , cl::Hidden , cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.") )	static

◆ ForceTargetMaxVectorInterleaveFactor

cl::opt< unsigned > ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")) ( "force-target-max-vector-interleave" , cl::init(0) , cl::Hidden , cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.") )	static

◆ ForceTargetNumScalarRegs

cl::opt< unsigned > ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) ( "force-target-num-scalar-regs" , cl::init(0) , cl::Hidden , cl::desc("A flag that overrides the target's number of scalar registers.") )	static

◆ ForceTargetNumVectorRegs

cl::opt< unsigned > ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) ( "force-target-num-vector-regs" , cl::init(0) , cl::Hidden , cl::desc("A flag that overrides the target's number of vector registers.") )	static

◆ ForceTargetSupportsScalableVectors

cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc( "Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing.")) ( "force-target-supports-scalable-vectors" , cl::init(false) , cl::Hidden , cl::desc( "Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing.") )

static

◆ LoopVectorizeWithBlockFrequency

cl::opt< bool > LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")) ( "loop-vectorize-with-block-frequency" , cl::init(true) , cl::Hidden , cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.") )

static

◆ MaximizeBandwidth

cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.")) ( "vectorizer-maximize-bandwidth" , cl::init(false) , cl::Hidden , cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.") )

static

◆ MaxNestedScalarReductionIC

cl::opt< unsigned > MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")) ( "max-nested-scalar-reduction-interleave" , cl::init(2) , cl::Hidden , cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.") )

static

◆ MinItersBypassWeights

uint32_t MinItersBypassWeights[] = {1, 127}	staticconstexpr

◆ NumberOfStoresToPredicate

The number of stores in a loop that are allowed to need predication.

◆ PreferInLoopReductions

cl::opt< bool > PreferInLoopReductions("prefer-inloop-reductions", cl::init(false), cl::Hidden, cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.")) ( "prefer-inloop-reductions" , cl::init(false) , cl::Hidden , cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.") )	static

◆ PreferPredicatedReductionSelect

cl::opt< bool > PreferPredicatedReductionSelect("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc( "Prefer predicating a reduction operation over an after loop select.")) ( "prefer-predicated-reduction-select" , cl::init(false) , cl::Hidden , cl::desc( "Prefer predicating a reduction operation over an after loop select.") )	static

◆ PreferPredicateOverEpilogue

cl::opt< PreferPredicateTy::Option > PreferPredicateOverEpilogue("prefer-predicate-over-epilogue", cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden, cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop."), cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails."))) ( "prefer-predicate-over-epilogue" , cl::init(PreferPredicateTy::ScalarEpilogue) , cl::Hidden , cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop.") , cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails.")) )

static

◆ SmallLoopCost

cl::opt< unsigned > SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc( "The cost of a loop that is considered 'small' by the interleaver.")) ( "small-loop-cost" , cl::init(20) , cl::Hidden , cl::desc( "The cost of a loop that is considered 'small' by the interleaver.") )	static

◆ TinyTripCountVectorThreshold

cl::opt< unsigned > TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.")) ( "vectorizer-min-trip-count" , cl::init(16) , cl::Hidden , cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.") )

static

◆ UseWiderVFIfCallVariantsPresent

cl::opt< bool > UseWiderVFIfCallVariantsPresent("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants")) ( "vectorizer-maximize-bandwidth-for-vector-calls" , cl::init(true) , cl::Hidden , cl::desc("Try wider VFs if they enable the use of vector variants") )	static

◆ VectorizeMemoryCheckThreshold

cl::opt< unsigned > VectorizeMemoryCheckThreshold("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks")) ( "vectorize-memory-check-threshold" , cl::init(128) , cl::Hidden , cl::desc("The maximum allowed number of runtime memory checks") )	static

◆ VerboseDebug

◆ VPlanBuildStressTest

cl::opt< bool > VPlanBuildStressTest("vplan-build-stress-test", cl::init(false), cl::Hidden, cl::desc( "Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path).")) ( "vplan-build-stress-test" , cl::init(false) , cl::Hidden , cl::desc( "Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path).") )

static