LLVM: lib/CodeGen/HardwareLoops.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
44
45#define DEBUG_TYPE "hardware-loops"
46
47#define HW_LOOPS_NAME "Hardware Loop Insertion"
48
49using namespace llvm;
50
53 cl::desc("Force hardware loops intrinsics to be inserted"));
54
58 cl::desc("Force hardware loop counter to be updated through a phi"));
59
62 cl::desc("Force allowance of nested hardware loops"));
63
66 cl::desc("Set the loop decrement value"));
67
70 cl::desc("Set the loop counter bitwidth"));
71
75 cl::desc("Force generation of loop guard intrinsic"));
76
77STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
78
79#ifndef NDEBUG
82 dbgs() << "HWLoops: " << DebugMsg;
83 if (I)
85 else
86 dbgs() << '.';
87 dbgs() << '\n';
88}
89#endif
90
93 BasicBlock *CodeRegion = L->getHeader();
95
96 if (I) {
97 CodeRegion = I->getParent();
98
99
100 if (I->getDebugLoc())
102 }
103
105 R << "hardware-loop not created: ";
106 return R;
107}
108
109namespace {
110
111 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
115 }
116
118
119 class HardwareLoopsLegacy : public FunctionPass {
120 public:
121 static char ID;
122
123 HardwareLoopsLegacy() : FunctionPass(ID) {
125 }
126
128
129 void getAnalysisUsage(AnalysisUsage &AU) const override {
132 AU.addRequired();
134 AU.addRequired();
135 AU.addPreserved();
136 AU.addRequired();
137 AU.addRequired();
138 AU.addRequired();
139 AU.addPreserved();
140 }
141 };
142
143 class HardwareLoopsImpl {
144 public:
145 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
146 DominatorTree &DT, const DataLayout &DL,
147 const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
148 AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
149 HardwareLoopOptions &Opts)
150 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
151 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
152
154
155 private:
156
157 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
158
159
160
161 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
162
163 ScalarEvolution &SE;
164 LoopInfo &LI;
165 bool PreserveLCSSA;
166 DominatorTree &DT;
167 const DataLayout &DL;
168 const TargetTransformInfo &TTI;
169 TargetLibraryInfo *TLI = nullptr;
170 AssumptionCache &AC;
171 OptimizationRemarkEmitter *ORE;
172 HardwareLoopOptions &Opts;
173 bool MadeChange = false;
174 };
175
176 class HardwareLoop {
177
178 Value *InitLoopCount();
179
180
181 Value *InsertIterationSetup(Value *LoopCountInit);
182
183
184 void InsertLoopDec();
185
186
188
189
190
191
192 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
193
194
195
196 void UpdateBranch(Value *EltsRem);
197
198 public:
199 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
200 const DataLayout &DL,
201 OptimizationRemarkEmitter *ORE,
202 HardwareLoopOptions &Opts) :
203 SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
204 ExitCount(Info.ExitCount),
205 CountType(Info.CountType),
206 ExitBranch(Info.ExitBranch),
207 LoopDecrement(Info.LoopDecrement),
208 UsePHICounter(Info.CounterInReg),
209 UseLoopGuard(Info.PerformEntryTest) { }
210
211 void Create();
212
213 private:
214 ScalarEvolution &SE;
215 const DataLayout &DL;
216 OptimizationRemarkEmitter *ORE = nullptr;
217 HardwareLoopOptions &Opts;
218 Loop *L = nullptr;
220 const SCEV *ExitCount = nullptr;
221 Type *CountType = nullptr;
222 BranchInst *ExitBranch = nullptr;
223 Value *LoopDecrement = nullptr;
224 bool UsePHICounter = false;
225 bool UseLoopGuard = false;
227 };
228}
229
230char HardwareLoopsLegacy::ID = 0;
231
232bool HardwareLoopsLegacy::runOnFunction(Function &F) {
233 if (skipFunction(F))
234 return false;
235
236 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
237
238 auto &LI = getAnalysis().getLoopInfo();
239 auto &SE = getAnalysis().getSE();
240 auto &DT = getAnalysis().getDomTree();
241 auto &TTI = getAnalysis().getTTI(F);
242 auto &DL = F.getDataLayout();
243 auto *ORE = &getAnalysis().getORE();
244 auto *TLIP = getAnalysisIfAvailable();
245 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
246 auto &AC = getAnalysis().getAssumptionCache(F);
247 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
248
249 HardwareLoopOptions Opts;
262
263 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
264 Opts);
265 return Impl.run(F);
266}
267
277 auto &DL = F.getDataLayout();
278
279 HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
283
289 return PA;
290}
291
292bool HardwareLoopsImpl::run(Function &F) {
294 for (Loop *L : LI)
295 if (L->isOutermost())
296 TryConvertLoop(L, Ctx);
297 return MadeChange;
298}
299
300
301
302bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
303
304 bool AnyChanged = false;
305 for (Loop *SL : *L)
306 AnyChanged |= TryConvertLoop(SL, Ctx);
307 if (AnyChanged) {
308 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
309 ORE, L);
310 return true;
311 }
312
313 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
314
315 HardwareLoopInfo HWLoopInfo(L);
316 if (!HWLoopInfo.canAnalyze(LI)) {
317 reportHWLoopFailure("cannot analyze loop, irreducible control flow",
318 "HWLoopCannotAnalyze", ORE, L);
319 return false;
320 }
321
322 if (!Opts.Force &&
324 reportHWLoopFailure("it's not profitable to create a hardware-loop",
325 "HWLoopNotProfitable", ORE, L);
326 return false;
327 }
328
329
330 if (Opts.Bitwidth.has_value()) {
332 }
333
335 HWLoopInfo.LoopDecrement =
336 ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
337
338 MadeChange |= TryConvertLoop(HWLoopInfo);
339 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
340}
341
342bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
343
345 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
346
349
350
351
352 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
353 return false;
354 }
355
358 "Hardware Loop must have set exit info.");
359
360 BasicBlock *Preheader = L->getLoopPreheader();
361
362
363 if (!Preheader)
365 if (!Preheader)
366 return false;
367
368 HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
369 HWLoop.Create();
370 ++NumHWLoops;
371 return true;
372}
373
374void HardwareLoop::Create() {
375 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
376
377 Value *LoopCountInit = InitLoopCount();
378 if (!LoopCountInit) {
379 reportHWLoopFailure("could not safely create a loop count expression",
380 "HWLoopNotSafe", ORE, L);
381 return;
382 }
383
384 Value *Setup = InsertIterationSetup(LoopCountInit);
385
386 if (UsePHICounter || Opts.ForcePhi) {
387 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
388 Value *EltsRem = InsertPHICounter(Setup, LoopDec);
390 UpdateBranch(LoopDec);
391 } else
392 InsertLoopDec();
393
394
395
396 for (auto *I : L->blocks())
398}
399
401 BasicBlock *Preheader = L->getLoopPreheader();
403 return false;
404
407 return false;
408
410 if (BI->isUnconditional() || (BI->getCondition()))
411 return false;
412
413
414
416 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
417 if (!ICmp->isEquality())
418 return false;
419
423 return false;
424 };
425
426
427 Value *CountBefZext =
429
430 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
431 !IsCompareZero(ICmp, CountBefZext, 0) &&
432 !IsCompareZero(ICmp, CountBefZext, 1))
433 return false;
434
436 if (BI->getSuccessor(SuccIdx) != Preheader)
437 return false;
438
439 return true;
440}
441
442Value *HardwareLoop::InitLoopCount() {
443 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
444
445
446
447 SCEVExpander SCEVE(SE, DL, "loopcnt");
449 ExitCount->getType() != CountType)
451
453
454
455
456
457
460 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
462 UseLoopGuard = true;
463 } else
464 UseLoopGuard = false;
465
470
471
472 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
473 UseLoopGuard = false;
474 else
475 BB = Predecessor;
476 }
477
478 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
479 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
480 << *ExitCount << "\n");
481 return nullptr;
482 }
483
484 Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
486
487
488
489
490
491
492
493
495 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
497 << " - Expanded Count in " << BB->getName() << "\n"
498 << " - Will insert set counter intrinsic into: "
499 << BeginBB->getName() << "\n");
501}
502
503Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
506 Builder.setIsFPConstrained(true);
508 bool UsePhi = UsePHICounter || Opts.ForcePhi;
510 ? (UsePhi ? Intrinsic::test_start_loop_iterations
511 : Intrinsic::test_set_loop_iterations)
512 : (UsePhi ? Intrinsic::start_loop_iterations
513 : Intrinsic::set_loop_iterations);
514 Value *LoopSetup = Builder.CreateIntrinsic(ID, Ty, LoopCountInit);
515
516
517 if (UseLoopGuard) {
520 "Expected conditional branch");
521
522 Value *SetCount =
523 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
525 LoopGuard->setCondition(SetCount);
526 if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
527 LoopGuard->swapSuccessors();
528 }
529 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
530 << "\n");
531 if (UsePhi && UseLoopGuard)
532 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
533 return !UsePhi ? LoopCountInit : LoopSetup;
534}
535
536void HardwareLoop::InsertLoopDec() {
538 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
539 Attribute::StrictFP))
540 CondBuilder.setIsFPConstrained(true);
541
543 Value *NewCond = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement,
547
548
551
552
553
555
556 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
557}
558
561 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
562 Attribute::StrictFP))
563 CondBuilder.setIsFPConstrained(true);
564
566 Value *Call = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement_reg,
568
569 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
571}
572
573PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
574 BasicBlock *Preheader = L->getLoopPreheader();
577 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
578 PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
579 Index->addIncoming(NumElts, Preheader);
580 Index->addIncoming(EltsRem, Latch);
581 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
583}
584
585void HardwareLoop::UpdateBranch(Value *EltsRem) {
588 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
591
592
595
596
597
599}
600
607
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
#define HW_LOOPS_NAME
Definition HardwareLoops.cpp:47
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
Definition HardwareLoops.cpp:92
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
Definition HardwareLoops.cpp:80
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
static bool CanGenerateTest(Loop *L, Value *Count)
Definition HardwareLoops.cpp:400
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Defines an IR pass for the creation of hardware loops.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Machine Check Debug Module
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Predicate getPredicate() const
Return the predicate for this instruction.
Analysis pass which computes a DominatorTree.
FunctionPass class - This class is used to implement most global optimizations.
AttributeList getAttributes() const
Return the attribute list for this Function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition HardwareLoops.cpp:268
This instruction compares its operands according to the predicate given to the constructor.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
Analysis pass that exposes the LoopInfo for a function.
Represents a single loop in the control flow graph.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
bool isPointerTy() const
True if this is an instance of PointerType.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
const ParentTy * getParent() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void initializeHardwareLoopsLegacyPass(PassRegistry &)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createHardwareLoopsLegacyPass()
Create Hardware Loop pass.
Definition HardwareLoops.cpp:608
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
std::optional< bool > Force
HardwareLoopOptions & setForceNested(bool Force)
std::optional< bool > ForceGuard
std::optional< unsigned > Decrement
HardwareLoopOptions & setDecrement(unsigned Count)
HardwareLoopOptions & setForceGuard(bool Force)
HardwareLoopOptions & setForce(bool Force)
HardwareLoopOptions & setCounterBitwidth(unsigned Width)
std::optional< unsigned > Bitwidth
HardwareLoopOptions & setForcePhi(bool Force)
std::optional< bool > ForcePhi
std::optional< bool > ForceNested
bool getForceNested() const