LLVM: lib/Transforms/Vectorize/LoopVectorize.cpp File Reference (original) (raw)
Go to the source code of this file.
Classes | |
---|---|
class | llvm::InnerLoopVectorizer |
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization factor (VF). More... | |
struct | llvm::EpilogueLoopVectorizationInfo |
Encapsulate information regarding vectorization of a loop and its epilogue. More... | |
class | llvm::InnerLoopAndEpilogueVectorizer |
An extension of the inner loop vectorizer that creates a skeleton for a vectorized loop that has its epilogue (residual) also vectorized. More... | |
class | llvm::EpilogueVectorizerMainLoop |
A specialized derived class of inner loop vectorizer that performs vectorization of main loops in the process of vectorizing loops and their epilogues. More... | |
class | llvm::EpilogueVectorizerEpilogueLoop |
class | llvm::LoopVectorizationCostModel |
LoopVectorizationCostModel - estimates the expected speedups due to vectorization. More... | |
struct | llvm::LoopVectorizationCostModel::RegisterUsage |
A struct that represents some properties of the register usage of a loop. More... | |
struct | llvm::LoopVectorizationCostModel::CallWideningDecision |
Namespaces | |
---|---|
namespace | PreferPredicateTy |
namespace | llvm |
This is an optimization pass for GlobalISel generic memory operations. | |
Functions | |
---|---|
STATISTIC (LoopsVectorized, "Number of loops vectorized") | |
STATISTIC (LoopsAnalyzed, "Number of loops analyzed for vectorization") | |
STATISTIC (LoopsEpilogueVectorized, "Number of epilogues vectorized") | |
static bool | hasIrregularType (Type *Ty, const DataLayout &DL) |
A helper function that returns true if the given type is irregular. | |
static std::optional< unsigned > | getSmallBestKnownTC (PredicatedScalarEvolution &PSE, Loop *L, bool CanUseConstantMax=true) |
Returns "best known" trip count for the specified loop L as defined by the following procedure: 1) Returns exact trip count if it is known. | |
static DebugLoc | getDebugLocFromInstOrOperands (Instruction *I) |
Look for a meaningful debug location on the instruction or its operands. | |
static void | debugVectorizationMessage (const StringRef Prefix, const StringRef DebugMsg, Instruction *I) |
Write a DebugMsg about vectorization to the debug output stream. | |
static OptimizationRemarkAnalysis | createLVAnalysis (const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I, DebugLoc DL={}) |
Create an analysis remark that explains why vectorization failed. | |
Value * | llvm::createStepForVF (IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step) |
Return a value for Step multiplied by VF. | |
Value * | llvm::getRuntimeVF (IRBuilderBase &B, Type *Ty, ElementCount VF) |
Return the runtime value for VF. | |
void | llvm::reportVectorizationFailure (const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr) |
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding optimization remark RemarkName. | |
static void | llvm::reportVectorizationInfo (const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={}) |
Reports an informative message: print Msg for debugging purposes as well as an optimization remark. | |
static void | llvm::reportVectorization (OptimizationRemarkEmitter *ORE, Loop *TheLoop, VectorizationFactor VF, unsigned IC) |
Report successful vectorization of the loop. | |
static bool | useActiveLaneMask (TailFoldingStyle Style) |
static bool | useActiveLaneMaskForControlFlow (TailFoldingStyle Style) |
static bool | isExplicitVecOuterLoop (Loop *OuterLp, OptimizationRemarkEmitter *ORE) |
static void | collectSupportedLoops (Loop &L, LoopInfo *LI, OptimizationRemarkEmitter *ORE, SmallVectorImpl< Loop * > &V) |
static Value * | emitTransformedIndex (IRBuilderBase &B, Value *Index, Value *StartValue, Value *Step, InductionDescriptor::InductionKind InductionKind, const BinaryOperator *InductionBinOp) |
Compute the transformed value of Index at offset StartValue using step StepValue. | |
std::optional< unsigned > | getMaxVScale (const Function &F, const TargetTransformInfo &TTI) |
static bool | isIndvarOverflowCheckKnownFalse (const LoopVectorizationCostModel *Cost, ElementCount VF, std::optional< unsigned > UF=std::nullopt) |
For the given VF and UF and maximum trip count computed for the loop, return whether the induction variable might overflow in the vectorized loop. | |
static bool | useMaskedInterleavedAccesses (const TargetTransformInfo &TTI) |
static void | replaceVPBBWithIRVPBB (VPBasicBlock *VPBB, BasicBlock *IRBB) |
Replace VPBB with a VPIRBasicBlock wrapping IRBB. | |
static Value * | getExpandedStep (const InductionDescriptor &ID, const SCEV2ValueTy &ExpandedSCEVs) |
Return the expanded step for ID using ExpandedSCEVs to look up SCEV expansion results. | |
static void | addFullyUnrolledInstructionsToIgnore (Loop *L, const LoopVectorizationLegality::InductionList &IL, SmallPtrSetImpl< Instruction * > &InstsToIgnore) |
Knowing that loop L executes a single vector iteration, add instructions that will get simplified and thus should not have any cost to InstsToIgnore. | |
static void | cse (BasicBlock *BB) |
Perform cse of induction variable instructions. | |
static Type * | maybeVectorizeType (Type *Elt, ElementCount VF) |
static std::optional< unsigned > | getVScaleForTuning (const Loop *L, const TargetTransformInfo &TTI) |
Convenience function that returns the value of vscale_range iff vscale_range.min == vscale_range.max or otherwise returns the value returned by the corresponding TTI method. | |
static unsigned | getEstimatedRuntimeVF (const Loop *L, const TargetTransformInfo &TTI, ElementCount VF) |
This function attempts to return a value that represents the vectorization factor at runtime. | |
static bool | willGenerateVectors (VPlan &Plan, ElementCount VF, const TargetTransformInfo &TTI) |
Check if any recipe of Plan will generate a vector value, which will be assigned a vector register. | |
static const SCEV * | getAddressAccessSCEV (Value *Ptr, LoopVectorizationLegality *Legal, PredicatedScalarEvolution &PSE, const Loop *TheLoop) |
Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence. | |
static ElementCount | determineVPlanVF (const TargetTransformInfo &TTI, LoopVectorizationCostModel &CM) |
static bool | planContainsAdditionalSimplifications (VPlan &Plan, VPCostContext &CostCtx, Loop *TheLoop) |
Return true if the original loop \ TheLoop contains any instructions that do not have corresponding recipes in Plan and are not marked to be ignored in CostCtx. | |
static void | addRuntimeUnrollDisableMetaData (Loop *L) |
static void | fixReductionScalarResumeWhenVectorizingEpilog (VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock, BasicBlock *BypassBlock) |
static VPWidenIntOrFpInductionRecipe * | createWidenInductionRecipes (PHINode *Phi, Instruction *PhiOrTrunc, VPValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop) |
Creates a VPWidenIntOrFpInductionRecpipe for Phi. | |
static void | addCanonicalIVRecipes (VPlan &Plan, Type *IdxTy, bool HasNUW, DebugLoc DL) |
static VPInstruction * | addResumePhiRecipeForInduction (VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder, VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) |
Create and return a ResumePhi for WideIV, unless it is truncated. | |
static void | addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan, DenseMap< VPValue *, VPValue * > &IVEndValues) |
Create resume phis in the scalar preheader for first-order recurrences, reductions and inductions, and update the VPIRInstructions wrapping the original phis in the scalar header. | |
static SetVector< VPIRInstruction * > | collectUsersInExitBlocks (Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan) |
static bool | addUsersInExitBlocks (VPlan &Plan, const SetVector< VPIRInstruction * > &ExitUsersToFix) |
static void | addExitUsersForFirstOrderRecurrences (VPlan &Plan, SetVector< VPIRInstruction * > &ExitUsersToFix) |
Handle users in the exit block for first order reductions in the original exit block. | |
static ScalarEpilogueLowering | getScalarEpilogueLowering (Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) |
static bool | processLoopInVPlanNativePath (Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, LoopVectorizationLegality *LVL, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints, LoopVectorizationRequirements &Requirements) |
static void | checkMixedPrecision (Loop *L, OptimizationRemarkEmitter *ORE) |
static bool | areRuntimeChecksProfitable (GeneratedRTChecks &Checks, VectorizationFactor &VF, Loop *L, const TargetTransformInfo &TTI, PredicatedScalarEvolution &PSE, ScalarEpilogueLowering SEL) |
static void | preparePlanForMainVectorLoop (VPlan &MainPlan, VPlan &EpiPlan) |
Prepare MainPlan for vectorizing the main vector loop during epilogue vectorization. | |
static void | preparePlanForEpilogueVectorLoop (VPlan &Plan, Loop *L, const SCEV2ValueTy &ExpandedSCEVs, const EpilogueLoopVectorizationInfo &EPI) |
Prepare Plan for vectorizing the epilogue loop. | |
Variables | |
---|---|
const char | VerboseDebug [] = DEBUG_TYPE "-verbose" |
static cl::opt< bool > | EnableEpilogueVectorization ("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops.")) |
static cl::opt< unsigned > | EpilogueVectorizationForceVF ("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops.")) |
static cl::opt< unsigned > | EpilogueVectorizationMinVF ("epilogue-vectorization-minimum-VF", cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization.")) |
static cl::opt< unsigned > | TinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.")) |
Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred. | |
static cl::opt< unsigned > | VectorizeMemoryCheckThreshold ("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks")) |
static cl::opt< PreferPredicateTy::Option > | PreferPredicateOverEpilogue ("prefer-predicate-over-epilogue", cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden, cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop."), cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails."))) |
static cl::opt< TailFoldingStyle > | ForceTailFoldingStyle ("force-tail-folding-style", cl::desc("Force the tail folding style"), cl::init(TailFoldingStyle::None), cl::values(clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN(TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask."))) |
static cl::opt< bool > | MaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.")) |
static cl::opt< bool > | EnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")) |
static cl::opt< bool > | EnableMaskedInterleavedMemAccesses ("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop")) |
An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps. | |
static cl::opt< unsigned > | ForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) |
static cl::opt< unsigned > | ForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) |
static cl::opt< unsigned > | ForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.")) |
static cl::opt< unsigned > | ForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")) |
cl::opt< unsigned > | ForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing.")) |
static cl::opt< bool > | ForceTargetSupportsScalableVectors ("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing.")) |
static cl::opt< unsigned > | SmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver.")) |
static cl::opt< bool > | LoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")) |
static cl::opt< bool > | EnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated")) |
static cl::opt< unsigned > | NumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) |
The number of stores in a loop that are allowed to need predication. | |
static cl::opt< bool > | EnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving")) |
static cl::opt< bool > | EnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) |
static cl::opt< unsigned > | MaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")) |
static cl::opt< bool > | PreferInLoopReductions ("prefer-inloop-reductions", cl::init(false), cl::Hidden, cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.")) |
static cl::opt< bool > | ForceOrderedReductions ("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")) |
static cl::opt< bool > | PreferPredicatedReductionSelect ("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc("Prefer predicating a reduction operation over an after loop select.")) |
cl::opt< bool > | llvm::EnableVPlanNativePath ("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization.")) |
static cl::opt< bool > | VPlanBuildStressTest ("vplan-build-stress-test", cl::init(false), cl::Hidden, cl::desc("Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path).")) |
static cl::opt< cl::boolOrDefault > | ForceSafeDivisor ("force-widen-divrem-via-safe-divisor", cl::Hidden, cl::desc("Override cost based safe divisor widening for div/rem instructions")) |
static cl::opt< bool > | UseWiderVFIfCallVariantsPresent ("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants")) |
static cl::opt< bool > | EnableEarlyExitVectorization ("enable-early-exit-vectorization", cl::init(false), cl::Hidden, cl::desc("Enable vectorization of early exit loops with uncountable exits.")) |
static constexpr uint32_t | SCEVCheckBypassWeights [] = {1, 127} |
static constexpr uint32_t | MemCheckBypassWeights [] = {1, 127} |
static constexpr uint32_t | MinItersBypassWeights [] = {1, 127} |
const char | LLVMLoopVectorizeFollowupAll [] = "llvm.loop.vectorize.followup_all" |
const char | LLVMLoopVectorizeFollowupVectorized [] |
const char | LLVMLoopVectorizeFollowupEpilogue [] |
◆ DEBUG_TYPE
◆ LV_NAME
#define LV_NAME "loop-vectorize"
◆ addCanonicalIVRecipes()
Definition at line 8927 of file LoopVectorize.cpp.
References llvm::VPUser::addOperand(), llvm::VPInstruction::BranchOnCount, llvm::VPBuilder::createNaryOp(), llvm::VPBuilder::createOverflowingOp(), DL, llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPBlockBase::getExitingBasicBlock(), llvm::VPlan::getOrAddLiveIn(), llvm::VPlan::getVectorLoopRegion(), llvm::VPlan::getVectorTripCount(), and llvm::VPlan::getVFxUF().
◆ addExitUsersForFirstOrderRecurrences()
Handle users in the exit block for first order reductions in the original exit block.
The penultimate value of recurrences is fed to their LCSSA phi users in the original exit block using the VPIRInstruction wrapping to the LCSSA phi.
Definition at line 9124 of file LoopVectorize.cpp.
References assert(), llvm::VPBuilder::createNaryOp(), llvm::VPInstruction::ExtractFromEnd, llvm::VPlan::getCanonicalIV(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPlan::getMiddleBlock(), llvm::VPlan::getOrAddLiveIn(), llvm::VPlan::getScalarPreheader(), llvm::VPCanonicalIVPHIRecipe::getScalarType(), llvm::VPBlockBase::getSingleSuccessor(), llvm::VPlan::getVectorLoopRegion(), llvm::VPBasicBlock::phis(), and llvm::VPUser::setOperand().
◆ addFullyUnrolledInstructionsToIgnore()
◆ addResumePhiRecipeForInduction()
Create and return a ResumePhi for WideIV
, unless it is truncated.
If the induction recipe is not canonical, creates a VPDerivedIVRecipe to compute the end value of the induction.
Definition at line 8953 of file LoopVectorize.cpp.
References llvm::VPBuilder::createDerivedIV(), llvm::VPBuilder::createNaryOp(), llvm::VPBuilder::createScalarCast(), llvm::VPRecipeBase::getDebugLoc(), llvm::VPWidenInductionRecipe::getInductionDescriptor(), llvm::VPHeaderPHIRecipe::getStartValue(), llvm::VPWidenInductionRecipe::getStepValue(), llvm::VPTypeAnalysis::inferScalarType(), and llvm::VPInstruction::ResumePhi.
Referenced by addScalarResumePhis().
◆ addRuntimeUnrollDisableMetaData()
static void addRuntimeUnrollDisableMetaData ( Loop * L) | static |
---|
◆ addScalarResumePhis()
Create resume phis in the scalar preheader for first-order recurrences, reductions and inductions, and update the VPIRInstructions wrapping the original phis in the scalar header.
End values for inductions are added to IVEndValues
.
Definition at line 8991 of file LoopVectorize.cpp.
References llvm::VPUser::addOperand(), addResumePhiRecipeForInduction(), assert(), llvm::VPBuilder::createNaryOp(), llvm::VPInstruction::ExtractFromEnd, llvm::VPlan::getCanonicalIV(), llvm::VPlan::getMiddleBlock(), llvm::VPlan::getOrAddLiveIn(), llvm::VPRecipeBuilder::getRecipe(), llvm::VPlan::getScalarHeader(), llvm::VPlan::getScalarPreheader(), llvm::VPCanonicalIVPHIRecipe::getScalarType(), llvm::VPBlockBase::getSinglePredecessor(), llvm::VPBlockBase::getSingleSuccessor(), llvm::VPlan::getVectorLoopRegion(), llvm::VPlan::getVectorTripCount(), Name, and llvm::VPInstruction::ResumePhi.
◆ addUsersInExitBlocks()
Definition at line 9087 of file LoopVectorize.cpp.
References B, llvm::SetVector< T, Vector, Set, N >::empty(), llvm::enumerate(), llvm::VPInstruction::ExtractFromEnd, llvm::IntegerType::get(), llvm::VPlan::getCanonicalIV(), llvm::VPlan::getMiddleBlock(), llvm::VPlan::getOrAddLiveIn(), llvm::VPCanonicalIVPHIRecipe::getScalarType(), and Idx.
◆ areRuntimeChecksProfitable()
Definition at line 10071 of file LoopVectorize.cpp.
References llvm::alignTo(), llvm::CM_ScalarEpilogueAllowed, llvm::VectorizationFactor::Cost, llvm::dbgs(), llvm::divideCeil(), getEstimatedRuntimeVF(), llvm::ElementCount::getFixed(), getSmallBestKnownTC(), llvm::InstructionCost::getValue(), llvm::details::FixedOrScalableQuantity< ElementCount, unsigned >::isKnownLT(), llvm::ElementCount::isScalar(), llvm::InstructionCost::isValid(), LLVM_DEBUG, llvm::VectorizationFactor::MinProfitableTripCount, llvm::VectorizationFactor::ScalarCost, VectorizeMemoryCheckThreshold, and llvm::VectorizationFactor::Width.
Referenced by llvm::LoopVectorizePass::processLoop().
◆ checkMixedPrecision()
◆ collectSupportedLoops()
◆ collectUsersInExitBlocks()
Definition at line 9052 of file LoopVectorize.cpp.
References assert(), llvm::VPlan::getExitBlocks(), llvm::LoopBase< BlockT, LoopT >::getExitingBlocks(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), llvm::VPlan::getMiddleBlock(), llvm::VPBlockBase::getPredecessors(), llvm::VPRecipeBuilder::getVPValueOrAddLiveIn(), llvm::SetVector< T, Vector, Set, N >::insert(), and llvm::SmallVectorBase< Size_T >::size().
◆ createLVAnalysis()
Create an analysis remark that explains why vectorization failed.
PassName
is the name of the pass (e.g. can be AlwaysPrint). RemarkName
is the identifier for the remark. If I
is passed it is an instruction that prevents vectorization. Otherwise TheLoop
is used for the location of the remark. If DL
is passed, use it as debug location for the remark.
Returns
the remark object that can be streamed to.
Definition at line 860 of file LoopVectorize.cpp.
Referenced by llvm::reportVectorizationFailure().
◆ createWidenInductionRecipes()
◆ cse()
◆ debugVectorizationMessage()
◆ determineVPlanVF()
◆ emitTransformedIndex()
Compute the transformed value of Index at offset StartValue using step StepValue.
For integer induction, returns StartValue + Index * StepValue. For pointer induction, returns StartValue[Index * StepValue]. FIXME: The newly created binary instructions should contain nsw/nuw flags, which can be found from the original scalar operations.
Definition at line 2214 of file LoopVectorize.cpp.
References assert(), B, CreateAdd(), CreateMul(), llvm::Value::getName(), llvm::BinaryOperator::getOpcode(), llvm::Value::getType(), llvm::InductionDescriptor::IK_FpInduction, llvm::InductionDescriptor::IK_IntInduction, llvm::InductionDescriptor::IK_NoInduction, llvm::InductionDescriptor::IK_PtrInduction, llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), llvm_unreachable, llvm::Offset, llvm::Value::setName(), X, and Y.
Referenced by llvm::InnerLoopVectorizer::createInductionAdditionalBypassValues(), and llvm::VPDerivedIVRecipe::execute().
◆ fixReductionScalarResumeWhenVectorizingEpilog()
Definition at line 7580 of file LoopVectorize.cpp.
References assert(), llvm::VPInstruction::ComputeReductionResult, llvm::count_if(), llvm::find_if(), llvm::VPTransformState::get(), llvm::PHINode::getIncomingValueForBlock(), llvm::RecurrenceDescriptor::getRecurrenceKind(), llvm::RecurrenceDescriptor::getRecurrenceStartValue(), llvm::RecurrenceDescriptor::getSentinelValue(), llvm::CmpInst::ICMP_EQ, llvm::CmpInst::ICMP_NE, llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind(), llvm::RecurrenceDescriptor::isFindLastIVRecurrenceKind(), llvm::PatternMatch::m_OneUse(), llvm::PatternMatch::m_Select(), llvm::PatternMatch::m_Specific(), llvm::PatternMatch::m_SpecificICmp(), llvm::PatternMatch::m_Value(), and llvm::PatternMatch::match().
Referenced by llvm::LoopVectorizationPlanner::executePlan().
◆ getAddressAccessSCEV()
◆ getDebugLocFromInstOrOperands()
Look for a meaningful debug location on the instruction or its operands.
Definition at line 820 of file LoopVectorize.cpp.
References I.
◆ getEstimatedRuntimeVF()
◆ getExpandedStep()
◆ getMaxVScale()
◆ getScalarEpilogueLowering()
Definition at line 9914 of file LoopVectorize.cpp.
References llvm::CM_ScalarEpilogueAllowed, llvm::CM_ScalarEpilogueNotAllowedOptSize, llvm::CM_ScalarEpilogueNotAllowedUsePredicate, llvm::CM_ScalarEpilogueNotNeededUsePredicate, F, llvm::LoopVectorizeHints::FK_Disabled, llvm::LoopVectorizeHints::FK_Enabled, llvm::LoopVectorizeHints::getForce(), llvm::LoopVectorizeHints::getPredicate(), llvm::IRPass, PreferPredicateTy::PredicateElseScalarEpilogue, PreferPredicateTy::PredicateOrDontVectorize, PreferPredicateOverEpilogue, llvm::TargetTransformInfo::preferPredicateOverEpilogue(), PreferPredicateTy::ScalarEpilogue, and llvm::shouldOptimizeForSize().
Referenced by llvm::LoopVectorizePass::processLoop(), and processLoopInVPlanNativePath().
◆ getSmallBestKnownTC()
◆ getVScaleForTuning()
◆ hasIrregularType()
◆ isExplicitVecOuterLoop()
Definition at line 2149 of file LoopVectorize.cpp.
References llvm::LoopVectorizeHints::allowVectorization(), assert(), llvm::dbgs(), llvm::LoopVectorizeHints::emitRemarkWithHints(), llvm::LoopVectorizeHints::FK_Undefined, llvm::LoopVectorizeHints::getForce(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::LoopVectorizeHints::getInterleave(), llvm::BasicBlock::getParent(), llvm::LoopBase< BlockT, LoopT >::isInnermost(), and LLVM_DEBUG.
Referenced by collectSupportedLoops().
◆ isIndvarOverflowCheckKnownFalse()
◆ maybeVectorizeType()
◆ planContainsAdditionalSimplifications()
Return true if the original loop \ TheLoop contains any instructions that do not have corresponding recipes in Plan
and are not marked to be ignored in CostCtx
.
This means the VPlan contains simplification that the legacy cost-model did not account for.
Definition at line 7407 of file LoopVectorize.cpp.
References llvm::any_of(), llvm::LoopBase< BlockT, LoopT >::blocks(), llvm::VPRegionBlock::getEntry(), llvm::VPlan::getVectorLoopRegion(), I, llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::insert(), IR, and llvm::vp_depth_first_deep().
Referenced by llvm::LoopVectorizationPlanner::computeBestVF().
◆ preparePlanForEpilogueVectorLoop()
Prepare Plan
for vectorizing the epilogue loop.
That is, re-use expanded SCEVs from ExpandedSCEVs
and set resume values for header recipes.
Definition at line 10227 of file LoopVectorize.cpp.
References llvm::all_of(), assert(), llvm::IRBuilderBase::CreateICmpEQ(), llvm::IRBuilderBase::CreateICmpNE(), llvm::IRBuilderBase::CreateSelect(), llvm::VPlan::getEntry(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::PHINode::getIncomingValueForBlock(), llvm::VPlan::getOrAddLiveIn(), getParent(), llvm::RecurrenceDescriptor::getRecurrenceKind(), llvm::RecurrenceDescriptor::getRecurrenceStartValue(), llvm::RecurrenceDescriptor::getSentinelValue(), llvm::VPlan::getTripCount(), llvm::VPlan::getVectorLoopRegion(), llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind(), llvm::RecurrenceDescriptor::isFindLastIVRecurrenceKind(), IV, llvm::make_early_inc_range(), P, llvm::predecessors(), llvm::VPValue::replaceAllUsesWith(), and llvm::VPlan::resetTripCount().
Referenced by llvm::LoopVectorizePass::processLoop().
◆ preparePlanForMainVectorLoop()
static void preparePlanForMainVectorLoop ( VPlan & MainPlan, VPlan & EpiPlan ) | static |
---|
Prepare MainPlan
for vectorizing the main vector loop during epilogue vectorization.
Remove ResumePhis from MainPlan
for inductions that don't have a corresponding wide induction in EpiPlan
.
Definition at line 10176 of file LoopVectorize.cpp.
References llvm::any_of(), llvm::VPBasicBlock::begin(), llvm::SmallPtrSetImpl< PtrType >::contains(), llvm::VPBuilder::createNaryOp(), llvm::VPRecipeBase::eraseFromParent(), llvm::VPlan::getCanonicalIV(), llvm::VPValue::getDefiningRecipe(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPUser::getOperand(), llvm::VPlan::getScalarHeader(), llvm::VPlan::getScalarPreheader(), llvm::VPlan::getVectorLoopRegion(), llvm::VPlan::getVectorTripCount(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::PatternMatch::m_Specific(), llvm::PatternMatch::m_SpecificInt(), llvm::make_early_inc_range(), llvm::PatternMatch::match(), llvm::VPBasicBlock::phis(), llvm::VPlanTransforms::removeDeadRecipes(), and llvm::VPInstruction::ResumePhi.
Referenced by llvm::LoopVectorizePass::processLoop().
◆ processLoopInVPlanNativePath()
static bool processLoopInVPlanNativePath ( Loop * L, PredicatedScalarEvolution & PSE, LoopInfo * LI, DominatorTree * DT, LoopVectorizationLegality * LVL, TargetTransformInfo * TTI, TargetLibraryInfo * TLI, DemandedBits * DB, AssumptionCache * AC, OptimizationRemarkEmitter * ORE, BlockFrequencyInfo * BFI, ProfileSummaryInfo * PSI, LoopVectorizeHints & Hints, LoopVectorizationRequirements & Requirements ) | static |
---|
Definition at line 9963 of file LoopVectorize.cpp.
References assert(), llvm::LoopVectorizationCostModel::collectElementTypesForWidening(), llvm::LoopVectorizationCostModel::CostKind, llvm::dbgs(), llvm::VectorizationFactor::Disabled(), llvm::EnableVPlanNativePath, llvm::LoopVectorizationPlanner::executePlan(), F, llvm::PredicatedScalarEvolution::getBackedgeTakenCount(), llvm::LoopVectorizationLegality::getLAI(), llvm::LoopVectorizationPlanner::getPlanFor(), getScalarEpilogueLowering(), llvm::LoopVectorizeHints::getWidth(), llvm::hasBranchWeightMD(), LLVM_DEBUG, llvm::LoopVectorizationPlanner::planInVPlanNativePath(), llvm::reportVectorization(), llvm::LoopVectorizeHints::setAlreadyVectorized(), llvm::verifyFunction(), VPlanBuildStressTest, and llvm::VectorizationFactor::Width.
Referenced by llvm::LoopVectorizePass::processLoop().
◆ replaceVPBBWithIRVPBB()
◆ STATISTIC() [1/3]
STATISTIC | ( | LoopsAnalyzed | , |
---|---|---|---|
"Number of loops analyzed for vectorization" | |||
) |
◆ STATISTIC() [2/3]
STATISTIC | ( | LoopsEpilogueVectorized | , |
---|---|---|---|
"Number of epilogues vectorized" | |||
) |
◆ STATISTIC() [3/3]
STATISTIC | ( | LoopsVectorized | , |
---|---|---|---|
"Number of loops vectorized" | |||
) |
◆ useActiveLaneMask()
◆ useActiveLaneMaskForControlFlow()
◆ useMaskedInterleavedAccesses()
◆ willGenerateVectors()
Check if any recipe of Plan
will generate a vector value, which will be assigned a vector register.
Definition at line 4437 of file LoopVectorize.cpp.
References assert(), llvm::collectEphemeralRecipesForVPlan(), llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::contains(), llvm::VPlan::getCanonicalIV(), llvm::VPRegionBlock::getEntry(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), llvm::TargetTransformInfo::getNumberOfParts(), llvm::VPCanonicalIVPHIRecipe::getScalarType(), llvm::VPlan::getVectorLoopRegion(), llvm::VPTypeAnalysis::inferScalarType(), llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::insert(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable(), llvm::ElementCount::isVector(), llvm_unreachable, llvm::toVectorTy(), and llvm::vp_depth_first_shallow().
Referenced by llvm::LoopVectorizationPlanner::computeBestVF().
◆ EnableCondStoresVectorization
◆ EnableEarlyExitVectorization
cl::opt< bool > EnableEarlyExitVectorization("enable-early-exit-vectorization", cl::init(false), cl::Hidden, cl::desc( "Enable vectorization of early exit loops with uncountable exits.")) ( "enable-early-exit-vectorization" , cl::init(false) , cl::Hidden , cl::desc( "Enable vectorization of early exit loops with uncountable exits.") ) | static |
---|
◆ EnableEpilogueVectorization
cl::opt< bool > EnableEpilogueVectorization("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops.")) ( "enable-epilogue-vectorization" , cl::init(true) , cl::Hidden , cl::desc("Enable vectorization of epilogue loops.") ) | static |
---|
◆ EnableIndVarRegisterHeur
cl::opt< bool > EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving")) ( "enable-ind-var-reg-heur" , cl::init(true) , cl::Hidden , cl::desc("Count the induction variable only once when interleaving") ) | static |
---|
◆ EnableInterleavedMemAccesses
cl::opt< bool > EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")) ( "enable-interleaved-mem-accesses" , cl::init(false) , cl::Hidden , cl::desc("Enable vectorization on interleaved memory accesses in a loop") ) | static |
---|
◆ EnableLoadStoreRuntimeInterleave
cl::opt< bool > EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc( "Enable runtime interleaving until load/store ports are saturated")) ( "enable-loadstore-runtime-interleave" , cl::init(true) , cl::Hidden , cl::desc( "Enable runtime interleaving until load/store ports are saturated") ) | static |
---|
◆ EnableMaskedInterleavedMemAccesses
cl::opt< bool > EnableMaskedInterleavedMemAccesses("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop")) ( "enable-masked-interleaved-mem-accesses" , cl::init(false) , cl::Hidden , cl::desc("Enable vectorization on masked interleaved memory accesses in a loop") ) | static |
---|
An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps.
Referenced by useMaskedInterleavedAccesses().
◆ EpilogueVectorizationForceVF
cl::opt< unsigned > EpilogueVectorizationForceVF("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops.")) ( "epilogue-vectorization-force-VF" , cl::init(1) , cl::Hidden , cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops.") ) | static |
---|
◆ EpilogueVectorizationMinVF
cl::opt< unsigned > EpilogueVectorizationMinVF("epilogue-vectorization-minimum-VF", cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization.")) ( "epilogue-vectorization-minimum-VF" , cl::Hidden , cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization.") ) | static |
---|
◆ ForceOrderedReductions
cl::opt< bool > ForceOrderedReductions("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")) ( "force-ordered-reductions" , cl::init(false) , cl::Hidden , cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions") ) | static |
---|
◆ ForceSafeDivisor
cl::opt< cl::boolOrDefault > ForceSafeDivisor("force-widen-divrem-via-safe-divisor", cl::Hidden, cl::desc( "Override cost based safe divisor widening for div/rem instructions")) ( "force-widen-divrem-via-safe-divisor" , cl::Hidden , cl::desc( "Override cost based safe divisor widening for div/rem instructions") ) | static |
---|
◆ ForceTailFoldingStyle
cl::opt< TailFoldingStyle > ForceTailFoldingStyle("force-tail-folding-style", cl::desc("Force the tail folding style"), cl::init(TailFoldingStyle::None), cl::values( clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN( TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask."))) ( "force-tail-folding-style" , cl::desc("Force the tail folding style") , cl::init(TailFoldingStyle::None) , cl::values( clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN( TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask.")) ) | static |
---|
◆ ForceTargetInstructionCost
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing.")) | ( | "force-target-instruction-cost" | , |
---|---|---|---|
cl::init(0) | , | ||
cl::Hidden | , | ||
cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing.") | |||
) |
◆ ForceTargetMaxScalarInterleaveFactor
cl::opt< unsigned > ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.")) ( "force-target-max-scalar-interleave" , cl::init(0) , cl::Hidden , cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.") ) | static |
---|
◆ ForceTargetMaxVectorInterleaveFactor
cl::opt< unsigned > ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")) ( "force-target-max-vector-interleave" , cl::init(0) , cl::Hidden , cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.") ) | static |
---|
◆ ForceTargetNumScalarRegs
cl::opt< unsigned > ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) ( "force-target-num-scalar-regs" , cl::init(0) , cl::Hidden , cl::desc("A flag that overrides the target's number of scalar registers.") ) | static |
---|
◆ ForceTargetNumVectorRegs
cl::opt< unsigned > ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) ( "force-target-num-vector-regs" , cl::init(0) , cl::Hidden , cl::desc("A flag that overrides the target's number of vector registers.") ) | static |
---|
◆ ForceTargetSupportsScalableVectors
cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc( "Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing.")) ( "force-target-supports-scalable-vectors" , cl::init(false) , cl::Hidden , cl::desc( "Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing.") ) | static |
---|
◆ LLVMLoopVectorizeFollowupAll
const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all"
◆ LLVMLoopVectorizeFollowupEpilogue
const char LLVMLoopVectorizeFollowupEpilogue[]
◆ LLVMLoopVectorizeFollowupVectorized
const char LLVMLoopVectorizeFollowupVectorized[]
◆ LoopVectorizeWithBlockFrequency
cl::opt< bool > LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")) ( "loop-vectorize-with-block-frequency" , cl::init(true) , cl::Hidden , cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.") ) | static |
---|
◆ MaximizeBandwidth
cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.")) ( "vectorizer-maximize-bandwidth" , cl::init(false) , cl::Hidden , cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.") ) | static |
---|
◆ MaxNestedScalarReductionIC
cl::opt< unsigned > MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")) ( "max-nested-scalar-reduction-interleave" , cl::init(2) , cl::Hidden , cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.") ) | static |
---|
◆ MemCheckBypassWeights
constexpr uint32_t MemCheckBypassWeights[] = {1, 127} | staticconstexpr |
---|
◆ MinItersBypassWeights
constexpr uint32_t MinItersBypassWeights[] = {1, 127} | staticconstexpr |
---|
◆ NumberOfStoresToPredicate
cl::opt< unsigned > NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) ( "vectorize-num-stores-pred" , cl::init(1) , cl::Hidden , cl::desc("Max number of stores to be predicated behind an if.") ) | static |
---|
The number of stores in a loop that are allowed to need predication.
◆ PreferInLoopReductions
cl::opt< bool > PreferInLoopReductions("prefer-inloop-reductions", cl::init(false), cl::Hidden, cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.")) ( "prefer-inloop-reductions" , cl::init(false) , cl::Hidden , cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.") ) | static |
---|
◆ PreferPredicatedReductionSelect
cl::opt< bool > PreferPredicatedReductionSelect("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc( "Prefer predicating a reduction operation over an after loop select.")) ( "prefer-predicated-reduction-select" , cl::init(false) , cl::Hidden , cl::desc( "Prefer predicating a reduction operation over an after loop select.") ) | static |
---|
◆ PreferPredicateOverEpilogue
cl::opt< PreferPredicateTy::Option > PreferPredicateOverEpilogue("prefer-predicate-over-epilogue", cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden, cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop."), cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails."))) ( "prefer-predicate-over-epilogue" , cl::init(PreferPredicateTy::ScalarEpilogue) , cl::Hidden , cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop.") , cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails.")) ) | static |
---|
◆ SCEVCheckBypassWeights
constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127} | staticconstexpr |
---|
◆ SmallLoopCost
cl::opt< unsigned > SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc( "The cost of a loop that is considered 'small' by the interleaver.")) ( "small-loop-cost" , cl::init(20) , cl::Hidden , cl::desc( "The cost of a loop that is considered 'small' by the interleaver.") ) | static |
---|
◆ TinyTripCountVectorThreshold
cl::opt< unsigned > TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.")) ( "vectorizer-min-trip-count" , cl::init(16) , cl::Hidden , cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.") ) | static |
---|
◆ UseWiderVFIfCallVariantsPresent
◆ VectorizeMemoryCheckThreshold
cl::opt< unsigned > VectorizeMemoryCheckThreshold("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks")) ( "vectorize-memory-check-threshold" , cl::init(128) , cl::Hidden , cl::desc("The maximum allowed number of runtime memory checks") ) | static |
---|
◆ VerboseDebug
◆ VPlanBuildStressTest
cl::opt< bool > VPlanBuildStressTest("vplan-build-stress-test", cl::init(false), cl::Hidden, cl::desc( "Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path).")) ( "vplan-build-stress-test" , cl::init(false) , cl::Hidden , cl::desc( "Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path).") ) | static |
---|