LLVM: lib/Transforms/Scalar/LoopInterchange.cpp File Reference (original) (raw)
Go to the source code of this file.
| Macros | |
|---|---|
| #define | DEBUG_TYPE "loop-interchange" |
| Functions | |
|---|---|
| STATISTIC (LoopsInterchanged, "Number of loops interchanged") | |
| static bool | noDuplicateRulesAndIgnore (ArrayRef< RuleTy > Rules) |
| static void | printDepMatrix (CharMatrix &DepMatrix) |
| static bool | inThisOrder (const Instruction *Src, const Instruction *Dst) |
| Return true if Src appears before Dst in the same basic block. | |
| static bool | populateDependencyMatrix (CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) |
| static void | interChangeDependencies (CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx) |
| static std::optional< bool > | isLexicographicallyPositive (ArrayRef< char > DV, unsigned Begin, unsigned End) |
| static bool | isLegalToInterChangeLoops (CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId) |
| static void | populateWorklist (Loop &L, LoopVector &LoopList) |
| static bool | hasSupportedLoopDepth (ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE) |
| static bool | isComputableLoopNest (ScalarEvolution *SE, ArrayRef< Loop * > LoopList) |
| static Value * | followLCSSA (Value *SV) |
| static PHINode * | findInnerReductionPhi (Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts) |
| static bool | areInnerLoopExitPHIsSupported (Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions) |
| static bool | areOuterLoopExitPHIsSupported (Loop *OuterLoop, Loop *InnerLoop) |
| static bool | areInnerLoopLatchPHIsSupported (Loop *OuterLoop, Loop *InnerLoop) |
| static bool | canVectorize (const CharMatrix &DepMatrix, unsigned LoopId) |
| Return true if we can vectorize the loop specified by LoopId. | |
| static void | moveBBContents (BasicBlock *FromBB, Instruction *InsertBefore) |
| Move all instructions except the terminator from FromBB right before InsertBefore. | |
| static void | swapBBContents (BasicBlock *BB1, BasicBlock *BB2) |
| Swap instructions between BB1 and BB2 but keep terminators intact. | |
| static void | updateSuccessor (BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true) |
| static void | moveLCSSAPhis (BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI) |
| static void | simplifyLCSSAPhis (Loop *OuterLoop, Loop *InnerLoop) |
| This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch block does not need to be exit block of the inner loop. |
| Variables | |
|---|---|
| static cl::opt< int > | LoopInterchangeCostThreshold ("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number")) |
| static cl::opt< unsigned int > | MaxMemInstrCount ("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time")) |
| static cl::opt< unsigned int > | MinLoopNestDepth ("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform")) |
| static cl::opt< unsigned int > | MaxLoopNestDepth ("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform")) |
| static cl::list< RuleTy > | Profitabilities ("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)"))) |
◆ DEBUG_TYPE
#define DEBUG_TYPE "loop-interchange"
◆ areInnerLoopExitPHIsSupported()
◆ areInnerLoopLatchPHIsSupported()
| bool areInnerLoopLatchPHIsSupported ( Loop * OuterLoop, Loop * InnerLoop ) | static |
|---|
◆ areOuterLoopExitPHIsSupported()
| bool areOuterLoopExitPHIsSupported ( Loop * OuterLoop, Loop * InnerLoop ) | static |
|---|
◆ canVectorize()
◆ findInnerReductionPhi()
Definition at line 903 of file LoopInterchange.cpp.
References AbstractManglingParser< Derived, Alloc >::Ops, llvm::Add, llvm::And, llvm::AnyOf, assert(), llvm::dyn_cast(), llvm::FAdd, llvm::FMax, llvm::FMaximum, llvm::FMaximumNum, llvm::FMin, llvm::FMinimum, llvm::FMinimumNum, llvm::FMul, llvm::FMulAdd, llvm::RecurrenceDescriptor::getExactFPMathInst(), llvm::RecurrenceDescriptor::getOpcode(), llvm::RecurrenceDescriptor::getRecurrenceKind(), llvm::RecurrenceDescriptor::getReductionOpChain(), I, llvm::isa(), llvm::RecurrenceDescriptor::isReductionPHI(), llvm::Mul, llvm::Or, PHI, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SMax, llvm::SMin, llvm::UMax, llvm::UMin, llvm::Value::users(), and llvm::Xor.
◆ followLCSSA()
◆ hasSupportedLoopDepth()
Definition at line 402 of file LoopInterchange.cpp.
References llvm::dbgs(), DEBUG_TYPE, llvm::OptimizationRemarkEmitter::emit(), llvm::ArrayRef< T >::front(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::Loop::getStartLoc(), LLVM_DEBUG, MaxLoopNestDepth, MinLoopNestDepth, and llvm::ArrayRef< T >::size().
Referenced by llvm::LoopInterchangePass::run().
◆ interChangeDependencies()
| void interChangeDependencies ( CharMatrix & DepMatrix, unsigned FromIndx, unsigned ToIndx ) | static |
|---|
◆ inThisOrder()
◆ isComputableLoopNest()
◆ isLegalToInterChangeLoops()
| bool isLegalToInterChangeLoops ( CharMatrix & DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId ) | static |
|---|
◆ isLexicographicallyPositive()
◆ moveBBContents()
◆ moveLCSSAPhis()
Definition at line 1800 of file LoopInterchange.cpp.
References llvm::PHINode::addIncoming(), llvm::all_of(), assert(), llvm::cast(), llvm::dyn_cast(), followLCSSA(), llvm::BasicBlock::getFirstNonPHIIt(), llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), llvm::BasicBlock::getParent(), I, llvm::Instruction::insertBefore(), llvm::make_early_inc_range(), llvm::make_pointer_range(), P, llvm::BasicBlock::phis(), llvm::predecessors(), llvm::BasicBlock::replacePhiUsesWith(), llvm::PHINode::setIncomingBlock(), and llvm::PHINode::setIncomingValue().
◆ noDuplicateRulesAndIgnore()
| bool noDuplicateRulesAndIgnore ( ArrayRef< RuleTy > Rules) | static |
|---|
◆ populateDependencyMatrix()
Definition at line 166 of file LoopInterchange.cpp.
References llvm::all_of(), assert(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::CallingConv::C, llvm::cast(), D(), llvm::dbgs(), DEBUG_TYPE, llvm::DependenceInfo::depends(), llvm::dyn_cast(), llvm::OptimizationRemarkEmitter::emit(), llvm::SmallVectorTemplateCommon< T, typename >::end(), llvm::Dependence::DVEntry::EQ, llvm::Dependence::DVEntry::GT, I, II, inThisOrder(), llvm::isa(), LLVM_DEBUG, llvm::Dependence::DVEntry::LT, MaxMemInstrCount, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SmallVectorTemplateCommon< T, typename >::size(), llvm::StringRef::size(), and llvm::StringMap< ValueTy, AllocatorTy >::try_emplace().
◆ populateWorklist()
◆ printDepMatrix()
| void printDepMatrix ( CharMatrix & DepMatrix) | static |
|---|
◆ simplifyLCSSAPhis()
| void simplifyLCSSAPhis ( Loop * OuterLoop, Loop * InnerLoop ) | static |
|---|
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch block does not need to be exit block of the inner loop.
Consider a loop that was in LCSSA form, but then some transformation like loop-unswitch comes along and creates an empty block, where BB5 in this example is the outer loop latch block:
BB4: br label BB5 BB5: old.cond.lcssa = phi i16 [ cond, BB4 ] br outer.header
Interchange then brings it in LCSSA form again resulting in this chain of single-input phi nodes:
BB4: new.cond.lcssa = phi i16 [ cond, BB3 ] br label BB5 BB5: old.cond.lcssa = phi i16 [ new.cond.lcssa, BB4 ]
The problem is that interchange can reoder blocks BB4 and BB5 placing the use before the def if we don't check this. The solution is to simplify lcssa phi nodes (remove) if they appear in non-exit blocks.
Definition at line 1919 of file LoopInterchange.cpp.
References assert(), llvm::dbgs(), llvm::LoopBase< BlockT, LoopT >::getExitBlock(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), LLVM_DEBUG, llvm::make_pointer_range(), and llvm::BasicBlock::phis().
◆ STATISTIC()
| STATISTIC | ( | LoopsInterchanged | , |
|---|---|---|---|
| "Number of loops interchanged" | ) |
◆ swapBBContents()
◆ updateSuccessor()
◆ LoopInterchangeCostThreshold
| cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number")) ( "loop-interchange-threshold" , cl::init(0) , cl::Hidden , cl::desc("Interchange if you gain more than this number") ) | static |
|---|
◆ MaxLoopNestDepth
| cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform")) ( "loop-interchange-max-loop-nest-depth" , cl::init(10) , cl::Hidden , cl::desc("Maximum depth of loop nest considered for the transform") ) | static |
|---|
◆ MaxMemInstrCount
| cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc( "Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time")) ( "loop-interchange-max-meminstr-count" , cl::init(64) , cl::Hidden , cl::desc( "Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time") ) | static |
|---|
◆ MinLoopNestDepth
| cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform")) ( "loop-interchange-min-loop-nest-depth" , cl::init(2) , cl::Hidden , cl::desc("Minimum depth of loop nest considered for the transform") ) | static |
|---|
◆ Profitabilities
| cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)"))) ( "loop-interchange-profitabilities" , cl::ZeroOrMore , cl::MiscFlags::CommaSeparated , cl::Hidden , cl::desc("List of profitability heuristics to be used. They are applied in " "the given order") , cl::list_init< RuleTy > {RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}, cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")) ) | static |
|---|