LLVM: lib/CodeGen/HardwareLoops.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

44

45#define DEBUG_TYPE "hardware-loops"

46

47#define HW_LOOPS_NAME "Hardware Loop Insertion"

48

49using namespace llvm;

50

53 cl::desc("Force hardware loops intrinsics to be inserted"));

54

58 cl::desc("Force hardware loop counter to be updated through a phi"));

59

62 cl::desc("Force allowance of nested hardware loops"));

63

66 cl::desc("Set the loop decrement value"));

67

70 cl::desc("Set the loop counter bitwidth"));

71

75 cl::desc("Force generation of loop guard intrinsic"));

76

77STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");

78

79#ifndef NDEBUG

82 dbgs() << "HWLoops: " << DebugMsg;

83 if (I)

84 dbgs() << ' ' << *I;

85 else

86 dbgs() << '.';

87 dbgs() << '\n';

88}

89#endif

90

93 BasicBlock *CodeRegion = L->getHeader();

95

96 if (I) {

97 CodeRegion = I->getParent();

98

99

100 if (I->getDebugLoc())

101 DL = I->getDebugLoc();

102 }

103

105 R << "hardware-loop not created: ";

106 return R;

107}

108

109namespace {

110

111 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,

115 }

116

118

119 class HardwareLoopsLegacy : public FunctionPass {

120 public:

121 static char ID;

122

123 HardwareLoopsLegacy() : FunctionPass(ID) {

125 }

126

128

129 void getAnalysisUsage(AnalysisUsage &AU) const override {

132 AU.addRequired();

134 AU.addRequired();

135 AU.addPreserved();

136 AU.addRequired();

137 AU.addRequired();

138 AU.addRequired();

139 AU.addPreserved();

140 }

141 };

142

143 class HardwareLoopsImpl {

144 public:

145 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,

146 DominatorTree &DT, const TargetTransformInfo &TTI,

147 TargetLibraryInfo *TLI, AssumptionCache &AC,

148 OptimizationRemarkEmitter *ORE, HardwareLoopOptions &Opts)

149 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), TTI(TTI),

150 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) {}

151

152 bool run(Function &F);

153

154 private:

155

156 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);

157

158

159

160 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);

161

162 ScalarEvolution &SE;

163 LoopInfo &LI;

164 bool PreserveLCSSA;

165 DominatorTree &DT;

166 const TargetTransformInfo &TTI;

167 TargetLibraryInfo *TLI = nullptr;

168 AssumptionCache &AC;

169 OptimizationRemarkEmitter *ORE;

170 HardwareLoopOptions &Opts;

171 bool MadeChange = false;

172 };

173

174 class HardwareLoop {

175

176 Value *InitLoopCount();

177

178

179 Value *InsertIterationSetup(Value *LoopCountInit);

180

181

182 void InsertLoopDec();

183

184

186

187

188

189

190 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);

191

192

193

194 void UpdateBranch(Value *EltsRem);

195

196 public:

197 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,

198 OptimizationRemarkEmitter *ORE, HardwareLoopOptions &Opts)

199 : SE(SE), ORE(ORE), Opts(Opts), L(Info.L),

200 M(L->getHeader()->getModule()), ExitCount(Info.ExitCount),

201 CountType(Info.CountType), ExitBranch(Info.ExitBranch),

202 LoopDecrement(Info.LoopDecrement), UsePHICounter(Info.CounterInReg),

203 UseLoopGuard(Info.PerformEntryTest) {}

204

205 void Create();

206

207 private:

208 ScalarEvolution &SE;

209 OptimizationRemarkEmitter *ORE = nullptr;

210 HardwareLoopOptions &Opts;

211 Loop *L = nullptr;

213 const SCEV *ExitCount = nullptr;

214 Type *CountType = nullptr;

215 BranchInst *ExitBranch = nullptr;

216 Value *LoopDecrement = nullptr;

217 bool UsePHICounter = false;

218 bool UseLoopGuard = false;

220 };

221}

222

223char HardwareLoopsLegacy::ID = 0;

224

225bool HardwareLoopsLegacy::runOnFunction(Function &F) {

226 if (skipFunction(F))

227 return false;

228

229 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");

230

231 auto &LI = getAnalysis().getLoopInfo();

232 auto &SE = getAnalysis().getSE();

233 auto &DT = getAnalysis().getDomTree();

234 auto &TTI = getAnalysis().getTTI(F);

235 auto *ORE = &getAnalysis().getORE();

236 auto *TLIP = getAnalysisIfAvailable();

237 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;

238 auto &AC = getAnalysis().getAssumptionCache(F);

239 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);

240

241 HardwareLoopOptions Opts;

254

255 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, TTI, TLI, AC, ORE, Opts);

256 return Impl.run(F);

257}

258

268

269 HardwareLoopsImpl Impl(SE, LI, true, DT, TTI, TLI, AC, ORE, Opts);

273

279 return PA;

280}

281

282bool HardwareLoopsImpl::run(Function &F) {

284 for (Loop *L : LI)

285 if (L->isOutermost())

286 TryConvertLoop(L, Ctx);

287 return MadeChange;

288}

289

290

291

292bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {

293

294 bool AnyChanged = false;

295 for (Loop *SL : *L)

296 AnyChanged |= TryConvertLoop(SL, Ctx);

297 if (AnyChanged) {

298 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",

299 ORE, L);

300 return true;

301 }

302

303 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");

304

305 HardwareLoopInfo HWLoopInfo(L);

306 if (!HWLoopInfo.canAnalyze(LI)) {

307 reportHWLoopFailure("cannot analyze loop, irreducible control flow",

308 "HWLoopCannotAnalyze", ORE, L);

309 return false;

310 }

311

312 if (!Opts.Force &&

314 reportHWLoopFailure("it's not profitable to create a hardware-loop",

315 "HWLoopNotProfitable", ORE, L);

316 return false;

317 }

318

319

320 if (Opts.Bitwidth.has_value()) {

322 }

323

325 HWLoopInfo.LoopDecrement =

326 ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());

327

328 MadeChange |= TryConvertLoop(HWLoopInfo);

329 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);

330}

331

332bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {

333

334 Loop *L = HWLoopInfo.L;

335 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);

336

339

340

341

342 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);

343 return false;

344 }

345

348 "Hardware Loop must have set exit info.");

349

350 BasicBlock *Preheader = L->getLoopPreheader();

351

352

353 if (!Preheader)

355 if (!Preheader)

356 return false;

357

358 HardwareLoop HWLoop(HWLoopInfo, SE, ORE, Opts);

359 HWLoop.Create();

360 ++NumHWLoops;

361 return true;

362}

363

364void HardwareLoop::Create() {

365 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");

366

367 Value *LoopCountInit = InitLoopCount();

368 if (!LoopCountInit) {

369 reportHWLoopFailure("could not safely create a loop count expression",

370 "HWLoopNotSafe", ORE, L);

371 return;

372 }

373

374 Value *Setup = InsertIterationSetup(LoopCountInit);

375

376 if (UsePHICounter || Opts.ForcePhi) {

377 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);

378 Value *EltsRem = InsertPHICounter(Setup, LoopDec);

380 UpdateBranch(LoopDec);

381 } else

382 InsertLoopDec();

383

384

385

386 for (auto *I : L->blocks())

388}

389

391 BasicBlock *Preheader = L->getLoopPreheader();

393 return false;

394

397 return false;

398

400 if (BI->isUnconditional() || isa<ICmpInst>(BI->getCondition()))

401 return false;

402

403

404

406 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");

407 if (!ICmp->isEquality())

408 return false;

409

413 return false;

414 };

415

416

417 Value *CountBefZext =

419

420 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&

421 !IsCompareZero(ICmp, CountBefZext, 0) &&

422 !IsCompareZero(ICmp, CountBefZext, 1))

423 return false;

424

426 if (BI->getSuccessor(SuccIdx) != Preheader)

427 return false;

428

429 return true;

430}

431

432Value *HardwareLoop::InitLoopCount() {

433 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");

434

435

436

437 SCEVExpander SCEVE(SE, "loopcnt");

439 ExitCount->getType() != CountType)

441

443

444

445

446

447

450 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");

452 UseLoopGuard = true;

453 } else

454 UseLoopGuard = false;

455

460

461

462 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))

463 UseLoopGuard = false;

464 else

465 BB = Predecessor;

466 }

467

468 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {

469 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "

470 << *ExitCount << "\n");

471 return nullptr;

472 }

473

474 Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,

476

477

478

479

480

481

482

483

485 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();

487 << " - Expanded Count in " << BB->getName() << "\n"

488 << " - Will insert set counter intrinsic into: "

489 << BeginBB->getName() << "\n");

491}

492

493Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {

496 Builder.setIsFPConstrained(true);

498 bool UsePhi = UsePHICounter || Opts.ForcePhi;

500 ? (UsePhi ? Intrinsic::test_start_loop_iterations

501 : Intrinsic::test_set_loop_iterations)

502 : (UsePhi ? Intrinsic::start_loop_iterations

503 : Intrinsic::set_loop_iterations);

504 Value *LoopSetup = Builder.CreateIntrinsic(ID, Ty, LoopCountInit);

505

506

507 if (UseLoopGuard) {

510 "Expected conditional branch");

511

512 Value *SetCount =

513 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;

515 LoopGuard->setCondition(SetCount);

516 if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())

517 LoopGuard->swapSuccessors();

518 }

519 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup

520 << "\n");

521 if (UsePhi && UseLoopGuard)

522 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);

523 return !UsePhi ? LoopCountInit : LoopSetup;

524}

525

526void HardwareLoop::InsertLoopDec() {

528 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(

529 Attribute::StrictFP))

530 CondBuilder.setIsFPConstrained(true);

531

533 Value *NewCond = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement,

537

538

541

542

543

545

546 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");

547}

548

551 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(

552 Attribute::StrictFP))

553 CondBuilder.setIsFPConstrained(true);

554

556 Value *Call = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement_reg,

558

559 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");

561}

562

563PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {

564 BasicBlock *Preheader = L->getLoopPreheader();

567 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());

568 PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);

569 Index->addIncoming(NumElts, Preheader);

570 Index->addIncoming(EltsRem, Latch);

571 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");

573}

574

575void HardwareLoop::UpdateBranch(Value *EltsRem) {

578 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));

581

582

585

586

587

589}

590

597

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Analysis containing CSE Info

This file contains the declarations for the subclasses of Constant, which represent the different fla...

static bool runOnFunction(Function &F, bool PostInlining)

static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))

#define HW_LOOPS_NAME

Definition HardwareLoops.cpp:47

static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))

static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)

Definition HardwareLoops.cpp:92

static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))

static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)

Definition HardwareLoops.cpp:80

static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))

static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))

static bool CanGenerateTest(Loop *L, Value *Count)

Definition HardwareLoops.cpp:390

static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))

Defines an IR pass for the creation of hardware loops.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Machine Check Debug Module

MachineInstr unsigned OpIdx

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

A function analysis which provides an AssumptionCache.

LLVM Basic Block Representation.

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI const BasicBlock * getSinglePredecessor() const

Return the predecessor of this block if it has a single predecessor block.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

void setCondition(Value *V)

LLVM_ABI void swapSuccessors()

Swap the successors of this branch instruction.

BasicBlock * getSuccessor(unsigned i) const

Value * getCondition() const

Analysis pass which computes BranchProbabilityInfo.

Predicate getPredicate() const

Return the predicate for this instruction.

Analysis pass which computes a DominatorTree.

FunctionPass class - This class is used to implement most global optimizations.

AttributeList getAttributes() const

Return the attribute list for this Function.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition HardwareLoops.cpp:259

This instruction compares its operands according to the predicate given to the constructor.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

This is an important class for using LLVM in a threaded context.

Analysis pass that exposes the LoopInfo for a function.

Represents a single loop in the control flow graph.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

Analysis pass that exposes the ScalarEvolution for a function.

LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)

Test whether entry to the loop is protected by a conditional between LHS and RHS.

const SCEV * getZero(Type *Ty)

Return a SCEV for the constant 0 of a specific type.

const SCEV * getOne(Type *Ty)

Return a SCEV for the constant 1 of a specific type.

LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)

LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

StringRef - Represent a constant reference to a string, i.e.

Analysis pass providing the TargetTransformInfo.

Analysis pass providing the TargetLibraryInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const

Query the target whether it would be profitable to convert the given loop into a hardware loop.

bool isPointerTy() const

True if this is an instance of PointerType.

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

int getNumOccurrences() const

const ParentTy * getParent() const

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ BasicBlock

Various leaf nodes.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)

InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)

Examine each PHI in the given block and delete it if it is dead.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

FunctionAddr VTableAddr Count

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI void initializeHardwareLoopsLegacyPass(PassRegistry &)

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI FunctionPass * createHardwareLoopsLegacyPass()

Create Hardware Loop pass.

Definition HardwareLoops.cpp:598

LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)

std::optional< bool > Force

HardwareLoopOptions & setForceNested(bool Force)

std::optional< bool > ForceGuard

std::optional< unsigned > Decrement

HardwareLoopOptions & setDecrement(unsigned Count)

HardwareLoopOptions & setForceGuard(bool Force)

HardwareLoopOptions & setForce(bool Force)

HardwareLoopOptions & setCounterBitwidth(unsigned Width)

std::optional< unsigned > Bitwidth

HardwareLoopOptions & setForcePhi(bool Force)

std::optional< bool > ForcePhi

std::optional< bool > ForceNested

bool getForceNested() const