LLVM: lib/CodeGen/HardwareLoops.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

44

45#define DEBUG_TYPE "hardware-loops"

46

47#define HW_LOOPS_NAME "Hardware Loop Insertion"

48

49using namespace llvm;

50

53 cl::desc("Force hardware loops intrinsics to be inserted"));

54

58 cl::desc("Force hardware loop counter to be updated through a phi"));

59

62 cl::desc("Force allowance of nested hardware loops"));

63

66 cl::desc("Set the loop decrement value"));

67

70 cl::desc("Set the loop counter bitwidth"));

71

75 cl::desc("Force generation of loop guard intrinsic"));

76

77STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");

78

79#ifndef NDEBUG

82 dbgs() << "HWLoops: " << DebugMsg;

83 if (I)

84 dbgs() << ' ' << *I;

85 else

86 dbgs() << '.';

87 dbgs() << '\n';

88}

89#endif

90

93 BasicBlock *CodeRegion = L->getHeader();

95

96 if (I) {

97 CodeRegion = I->getParent();

98

99

100 if (I->getDebugLoc())

101 DL = I->getDebugLoc();

102 }

103

105 R << "hardware-loop not created: ";

106 return R;

107}

108

109namespace {

110

111 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,

115 }

116

118

119 class HardwareLoopsLegacy : public FunctionPass {

120 public:

121 static char ID;

122

123 HardwareLoopsLegacy() : FunctionPass(ID) {

125 }

126

128

129 void getAnalysisUsage(AnalysisUsage &AU) const override {

132 AU.addRequired();

134 AU.addRequired();

135 AU.addPreserved();

136 AU.addRequired();

137 AU.addRequired();

138 AU.addRequired();

139 AU.addPreserved();

140 }

141 };

142

143 class HardwareLoopsImpl {

144 public:

145 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,

146 DominatorTree &DT, const DataLayout &DL,

147 const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,

148 AssumptionCache &AC, OptimizationRemarkEmitter *ORE,

149 HardwareLoopOptions &Opts)

150 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),

151 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }

152

153 bool run(Function &F);

154

155 private:

156

157 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);

158

159

160

161 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);

162

163 ScalarEvolution &SE;

164 LoopInfo &LI;

165 bool PreserveLCSSA;

166 DominatorTree &DT;

167 const DataLayout &DL;

168 const TargetTransformInfo &TTI;

169 TargetLibraryInfo *TLI = nullptr;

170 AssumptionCache &AC;

171 OptimizationRemarkEmitter *ORE;

172 HardwareLoopOptions &Opts;

173 bool MadeChange = false;

174 };

175

176 class HardwareLoop {

177

178 Value *InitLoopCount();

179

180

181 Value *InsertIterationSetup(Value *LoopCountInit);

182

183

184 void InsertLoopDec();

185

186

188

189

190

191

192 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);

193

194

195

196 void UpdateBranch(Value *EltsRem);

197

198 public:

199 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,

200 const DataLayout &DL,

201 OptimizationRemarkEmitter *ORE,

202 HardwareLoopOptions &Opts) :

203 SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),

204 ExitCount(Info.ExitCount),

205 CountType(Info.CountType),

206 ExitBranch(Info.ExitBranch),

207 LoopDecrement(Info.LoopDecrement),

208 UsePHICounter(Info.CounterInReg),

209 UseLoopGuard(Info.PerformEntryTest) { }

210

211 void Create();

212

213 private:

214 ScalarEvolution &SE;

215 const DataLayout &DL;

216 OptimizationRemarkEmitter *ORE = nullptr;

217 HardwareLoopOptions &Opts;

218 Loop *L = nullptr;

220 const SCEV *ExitCount = nullptr;

221 Type *CountType = nullptr;

222 BranchInst *ExitBranch = nullptr;

223 Value *LoopDecrement = nullptr;

224 bool UsePHICounter = false;

225 bool UseLoopGuard = false;

227 };

228}

229

230char HardwareLoopsLegacy::ID = 0;

231

232bool HardwareLoopsLegacy::runOnFunction(Function &F) {

233 if (skipFunction(F))

234 return false;

235

236 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");

237

238 auto &LI = getAnalysis().getLoopInfo();

239 auto &SE = getAnalysis().getSE();

240 auto &DT = getAnalysis().getDomTree();

241 auto &TTI = getAnalysis().getTTI(F);

242 auto &DL = F.getDataLayout();

243 auto *ORE = &getAnalysis().getORE();

244 auto *TLIP = getAnalysisIfAvailable();

245 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;

246 auto &AC = getAnalysis().getAssumptionCache(F);

247 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);

248

249 HardwareLoopOptions Opts;

262

263 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,

264 Opts);

265 return Impl.run(F);

266}

267

277 auto &DL = F.getDataLayout();

278

279 HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);

283

289 return PA;

290}

291

292bool HardwareLoopsImpl::run(Function &F) {

294 for (Loop *L : LI)

295 if (L->isOutermost())

296 TryConvertLoop(L, Ctx);

297 return MadeChange;

298}

299

300

301

302bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {

303

304 bool AnyChanged = false;

305 for (Loop *SL : *L)

306 AnyChanged |= TryConvertLoop(SL, Ctx);

307 if (AnyChanged) {

308 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",

309 ORE, L);

310 return true;

311 }

312

313 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");

314

315 HardwareLoopInfo HWLoopInfo(L);

316 if (!HWLoopInfo.canAnalyze(LI)) {

317 reportHWLoopFailure("cannot analyze loop, irreducible control flow",

318 "HWLoopCannotAnalyze", ORE, L);

319 return false;

320 }

321

322 if (!Opts.Force &&

324 reportHWLoopFailure("it's not profitable to create a hardware-loop",

325 "HWLoopNotProfitable", ORE, L);

326 return false;

327 }

328

329

330 if (Opts.Bitwidth.has_value()) {

332 }

333

335 HWLoopInfo.LoopDecrement =

336 ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());

337

338 MadeChange |= TryConvertLoop(HWLoopInfo);

339 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);

340}

341

342bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {

343

344 Loop *L = HWLoopInfo.L;

345 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);

346

349

350

351

352 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);

353 return false;

354 }

355

358 "Hardware Loop must have set exit info.");

359

360 BasicBlock *Preheader = L->getLoopPreheader();

361

362

363 if (!Preheader)

365 if (!Preheader)

366 return false;

367

368 HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);

369 HWLoop.Create();

370 ++NumHWLoops;

371 return true;

372}

373

374void HardwareLoop::Create() {

375 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");

376

377 Value *LoopCountInit = InitLoopCount();

378 if (!LoopCountInit) {

379 reportHWLoopFailure("could not safely create a loop count expression",

380 "HWLoopNotSafe", ORE, L);

381 return;

382 }

383

384 Value *Setup = InsertIterationSetup(LoopCountInit);

385

386 if (UsePHICounter || Opts.ForcePhi) {

387 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);

388 Value *EltsRem = InsertPHICounter(Setup, LoopDec);

390 UpdateBranch(LoopDec);

391 } else

392 InsertLoopDec();

393

394

395

396 for (auto *I : L->blocks())

398}

399

401 BasicBlock *Preheader = L->getLoopPreheader();

403 return false;

404

407 return false;

408

410 if (BI->isUnconditional() || isa<ICmpInst>(BI->getCondition()))

411 return false;

412

413

414

416 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");

417 if (!ICmp->isEquality())

418 return false;

419

423 return false;

424 };

425

426

427 Value *CountBefZext =

429

430 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&

431 !IsCompareZero(ICmp, CountBefZext, 0) &&

432 !IsCompareZero(ICmp, CountBefZext, 1))

433 return false;

434

436 if (BI->getSuccessor(SuccIdx) != Preheader)

437 return false;

438

439 return true;

440}

441

442Value *HardwareLoop::InitLoopCount() {

443 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");

444

445

446

447 SCEVExpander SCEVE(SE, DL, "loopcnt");

449 ExitCount->getType() != CountType)

451

453

454

455

456

457

460 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");

462 UseLoopGuard = true;

463 } else

464 UseLoopGuard = false;

465

470

471

472 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))

473 UseLoopGuard = false;

474 else

475 BB = Predecessor;

476 }

477

478 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {

479 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "

480 << *ExitCount << "\n");

481 return nullptr;

482 }

483

484 Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,

486

487

488

489

490

491

492

493

495 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();

497 << " - Expanded Count in " << BB->getName() << "\n"

498 << " - Will insert set counter intrinsic into: "

499 << BeginBB->getName() << "\n");

501}

502

503Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {

506 Builder.setIsFPConstrained(true);

508 bool UsePhi = UsePHICounter || Opts.ForcePhi;

510 ? (UsePhi ? Intrinsic::test_start_loop_iterations

511 : Intrinsic::test_set_loop_iterations)

512 : (UsePhi ? Intrinsic::start_loop_iterations

513 : Intrinsic::set_loop_iterations);

514 Value *LoopSetup = Builder.CreateIntrinsic(ID, Ty, LoopCountInit);

515

516

517 if (UseLoopGuard) {

520 "Expected conditional branch");

521

522 Value *SetCount =

523 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;

525 LoopGuard->setCondition(SetCount);

526 if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())

527 LoopGuard->swapSuccessors();

528 }

529 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup

530 << "\n");

531 if (UsePhi && UseLoopGuard)

532 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);

533 return !UsePhi ? LoopCountInit : LoopSetup;

534}

535

536void HardwareLoop::InsertLoopDec() {

538 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(

539 Attribute::StrictFP))

540 CondBuilder.setIsFPConstrained(true);

541

543 Value *NewCond = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement,

547

548

551

552

553

555

556 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");

557}

558

561 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(

562 Attribute::StrictFP))

563 CondBuilder.setIsFPConstrained(true);

564

566 Value *Call = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement_reg,

568

569 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");

571}

572

573PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {

574 BasicBlock *Preheader = L->getLoopPreheader();

577 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());

578 PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);

579 Index->addIncoming(NumElts, Preheader);

580 Index->addIncoming(EltsRem, Latch);

581 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");

583}

584

585void HardwareLoop::UpdateBranch(Value *EltsRem) {

588 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));

591

592

595

596

597

599}

600

607

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Analysis containing CSE Info

This file contains the declarations for the subclasses of Constant, which represent the different fla...

static bool runOnFunction(Function &F, bool PostInlining)

static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))

#define HW_LOOPS_NAME

Definition HardwareLoops.cpp:47

static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))

static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)

Definition HardwareLoops.cpp:92

static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))

static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)

Definition HardwareLoops.cpp:80

static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))

static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))

static bool CanGenerateTest(Loop *L, Value *Count)

Definition HardwareLoops.cpp:400

static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))

Defines an IR pass for the creation of hardware loops.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Machine Check Debug Module

MachineInstr unsigned OpIdx

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

A function analysis which provides an AssumptionCache.

LLVM Basic Block Representation.

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI const BasicBlock * getSinglePredecessor() const

Return the predecessor of this block if it has a single predecessor block.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

void setCondition(Value *V)

LLVM_ABI void swapSuccessors()

Swap the successors of this branch instruction.

BasicBlock * getSuccessor(unsigned i) const

Value * getCondition() const

Analysis pass which computes BranchProbabilityInfo.

Predicate getPredicate() const

Return the predicate for this instruction.

Analysis pass which computes a DominatorTree.

FunctionPass class - This class is used to implement most global optimizations.

AttributeList getAttributes() const

Return the attribute list for this Function.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition HardwareLoops.cpp:268

This instruction compares its operands according to the predicate given to the constructor.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

This is an important class for using LLVM in a threaded context.

Analysis pass that exposes the LoopInfo for a function.

Represents a single loop in the control flow graph.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

Analysis pass that exposes the ScalarEvolution for a function.

LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)

Test whether entry to the loop is protected by a conditional between LHS and RHS.

const SCEV * getZero(Type *Ty)

Return a SCEV for the constant 0 of a specific type.

const SCEV * getOne(Type *Ty)

Return a SCEV for the constant 1 of a specific type.

LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)

LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

StringRef - Represent a constant reference to a string, i.e.

Analysis pass providing the TargetTransformInfo.

Analysis pass providing the TargetLibraryInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const

Query the target whether it would be profitable to convert the given loop into a hardware loop.

bool isPointerTy() const

True if this is an instance of PointerType.

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

int getNumOccurrences() const

const ParentTy * getParent() const

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ BasicBlock

Various leaf nodes.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)

InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)

Examine each PHI in the given block and delete it if it is dead.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

FunctionAddr VTableAddr Count

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI void initializeHardwareLoopsLegacyPass(PassRegistry &)

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI FunctionPass * createHardwareLoopsLegacyPass()

Create Hardware Loop pass.

Definition HardwareLoops.cpp:608

LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)

std::optional< bool > Force

HardwareLoopOptions & setForceNested(bool Force)

std::optional< bool > ForceGuard

std::optional< unsigned > Decrement

HardwareLoopOptions & setDecrement(unsigned Count)

HardwareLoopOptions & setForceGuard(bool Force)

HardwareLoopOptions & setForce(bool Force)

HardwareLoopOptions & setCounterBitwidth(unsigned Width)

std::optional< unsigned > Bitwidth

HardwareLoopOptions & setForcePhi(bool Force)

std::optional< bool > ForcePhi

std::optional< bool > ForceNested

bool getForceNested() const