LLVM: lib/Transforms/Scalar/LoopDataPrefetch.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

15

32

33#define DEBUG_TYPE "loop-data-prefetch"

34

35using namespace llvm;

36

37

38

41 cl::desc("Prefetch write addresses"));

42

45 cl::desc("Number of instructions to prefetch ahead"),

47

51

53 "max-prefetch-iters-ahead",

55

56STATISTIC(NumPrefetches, "Number of prefetches inserted");

57

58namespace {

59

60

61class LoopDataPrefetch {

62public:

66 : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}

67

68 bool run();

69

70private:

71 bool runOnLoop(Loop *L);

72

73

74

75 bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);

76

77 unsigned getMinPrefetchStride(unsigned NumMemAccesses,

78 unsigned NumStridedMemAccesses,

79 unsigned NumPrefetches,

80 bool HasCall) {

83 return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,

84 NumPrefetches, HasCall);

85 }

86

87 unsigned getPrefetchDistance() {

90 return TTI->getPrefetchDistance();

91 }

92

93 unsigned getMaxPrefetchIterationsAhead() {

96 return TTI->getMaxPrefetchIterationsAhead();

97 }

98

99 bool doPrefetchWrites() {

102 return TTI->enableWritePrefetching();

103 }

104

105 AssumptionCache *AC;

106 DominatorTree *DT;

107 LoopInfo *LI;

108 ScalarEvolution *SE;

109 const TargetTransformInfo *TTI;

110 OptimizationRemarkEmitter *ORE;

111};

112

113

114class LoopDataPrefetchLegacyPass : public FunctionPass {

115public:

116 static char ID;

117 LoopDataPrefetchLegacyPass() : FunctionPass(ID) {

119 }

120

121 void getAnalysisUsage(AnalysisUsage &AU) const override {

122 AU.addRequired();

123 AU.addRequired();

129 AU.addRequired();

130 AU.addRequired();

131 AU.addPreserved();

132 AU.addRequired();

133 }

134

136 };

137}

138

139char LoopDataPrefetchLegacyPass::ID = 0;

141 "Loop Data Prefetch", false, false)

150

152 return new LoopDataPrefetchLegacyPass();

153}

154

155bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,

156 unsigned TargetMinStride) {

157

158 if (TargetMinStride <= 1)

159 return true;

160

162

163

164 if (!ConstStride)

165 return false;

166

167 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());

168 return TargetMinStride <= AbsStride;

169}

170

180

181 LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);

182 bool Changed = LDP.run();

183

188 return PA;

189 }

190

192}

193

194bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {

195 if (skipFunction(F))

196 return false;

197

198 DominatorTree *DT = &getAnalysis().getDomTree();

199 LoopInfo *LI = &getAnalysis().getLoopInfo();

200 ScalarEvolution *SE = &getAnalysis().getSE();

202 &getAnalysis().getAssumptionCache(F);

204 &getAnalysis().getORE();

206 &getAnalysis().getTTI(F);

207

208 LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);

209 return LDP.run();

210}

211

212bool LoopDataPrefetch::run() {

213

214

215

216 if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) {

217 LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "

218 "for loop data prefetch.\n");

219 return false;

220 }

221

222 bool MadeChange = false;

223

224 for (Loop *I : *LI)

226 MadeChange |= runOnLoop(L);

227

228 return MadeChange;

229}

230

231

232

234

236

238

240

242

243

247

248

249

250

251

253 int64_t PtrDiff = 0) {

258 } else {

261 if (PrefBB != InsBB) {

263 if (DomBB != PrefBB)

265 }

266

269 }

270 }

271};

272

273bool LoopDataPrefetch::runOnLoop(Loop *L) {

274 bool MadeChange = false;

275

276

277 if (L->isInnermost())

278 return MadeChange;

279

280 SmallPtrSet<const Value *, 32> EphValues;

282

283

285 bool HasCall = false;

286 for (const auto BB : L->blocks()) {

287

288

289 for (auto &I : *BB) {

292 if (F->getIntrinsicID() == Intrinsic::prefetch)

293 return MadeChange;

295 HasCall = true;

296 } else {

297 HasCall = true;

298 }

299 }

300 }

301 Metrics.analyzeBasicBlock(BB, *TTI, EphValues);

302 }

303

304 if (Metrics.NumInsts.isValid())

305 return MadeChange;

306

307 unsigned LoopSize = Metrics.NumInsts.getValue();

308 if (!LoopSize)

309 LoopSize = 1;

310

311 unsigned ItersAhead = getPrefetchDistance() / LoopSize;

312 if (!ItersAhead)

313 ItersAhead = 1;

314

315 if (ItersAhead > getMaxPrefetchIterationsAhead())

316 return MadeChange;

317

319 if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)

320 return MadeChange;

321

322 unsigned NumMemAccesses = 0;

323 unsigned NumStridedMemAccesses = 0;

325 for (const auto BB : L->blocks())

326 for (auto &I : *BB) {

329

331 MemI = LMemI;

332 PtrValue = LMemI->getPointerOperand();

334 if (!doPrefetchWrites()) continue;

335 MemI = SMemI;

336 PtrValue = SMemI->getPointerOperand();

337 } else continue;

338

341 continue;

342 NumMemAccesses++;

343 if (L->isLoopInvariant(PtrValue))

344 continue;

345

346 const SCEV *LSCEV = SE->getSCEV(PtrValue);

348 if (!LSCEVAddRec)

349 continue;

350 NumStridedMemAccesses++;

351

352

353

354

355 bool DupPref = false;

356 for (auto &Pref : Prefetches) {

357 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);

358 if (const SCEVConstant *ConstPtrDiff =

360 int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());

362 Pref.addInstruction(MemI, DT, PD);

363 DupPref = true;

364 break;

365 }

366 }

367 }

368 if (!DupPref)

369 Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));

370 }

371

372 unsigned TargetMinStride =

373 getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,

374 Prefetches.size(), HasCall);

375

377 << " iterations ahead (loop size: " << LoopSize << ") in "

378 << L->getHeader()->getParent()->getName() << ": " << *L);

380 << NumMemAccesses << " memory accesses, "

381 << NumStridedMemAccesses << " strided memory accesses, "

382 << Prefetches.size() << " potential prefetch(es), "

383 << "a minimum stride of " << TargetMinStride << ", "

384 << (HasCall ? "calls" : "no calls") << ".\n");

385

386 for (auto &P : Prefetches) {

387

388

389 if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))

390 continue;

391

392 BasicBlock *BB = P.InsertPt->getParent();

393 SCEVExpander SCEVE(*SE, BB->getDataLayout(), "prefaddr");

395 SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),

396 P.LSCEVAddRec->getStepRecurrence(*SE)));

397 if (!SCEVE.isSafeToExpand(NextLSCEV))

398 continue;

399

401 Type *I8Ptr = PointerType::get(BB->getContext(), PtrAddrSpace);

402 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);

403

406 Builder.CreateIntrinsic(Intrinsic::prefetch, PrefPtrValue->getType(),

407 {PrefPtrValue, ConstantInt::get(I32, P.Writes),

408 ConstantInt::get(I32, 3),

409 ConstantInt::get(I32, 1)});

410 ++NumPrefetches;

413 << ", SCEV: " << *P.LSCEVAddRec << "\n");

414 ORE->emit([&]() {

415 return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)

416 << "prefetched memory access";

417 });

418

419 MadeChange = true;

420 }

421

422 return MadeChange;

423}

This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.

static bool runOnFunction(Function &F, bool PostInlining)

static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))

static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)

loop data Loop Data Prefetch

Definition LoopDataPrefetch.cpp:149

static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)

static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)

This file provides the interface for LLVM's Loop Data Prefetching Pass.

static const Function * getCalledFunction(const Value *V)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)

AnalysisUsage & addPreservedID(const void *ID)

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

A function analysis which provides an AssumptionCache.

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this basic block belongs to.

LLVM_ABI LLVMContext & getContext() const

Get the context in which this basic block lives.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const

Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...

FunctionPass class - This class is used to implement most global optimizations.

Analysis pass that exposes the LoopInfo for a function.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Run the pass over the function.

Definition LoopDataPrefetch.cpp:171

The legacy pass manager's analysis pass to compute loop information.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

This node represents a polynomial recurrence on the trip count of the specified loop.

const SCEV * getStepRecurrence(ScalarEvolution &SE) const

Constructs and returns the recurrence indicating how much this expression steps by.

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

Analysis pass that exposes the ScalarEvolution for a function.

The main scalar evolution driver.

LLVM_ABI const SCEV * getConstant(ConstantInt *V)

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Returns the upper bound of the loop trip count as a normal unsigned value.

LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical multiply expression, or something simpler if possible.

LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

Analysis pass providing the TargetTransformInfo.

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI bool shouldPrefetchAddressSpace(unsigned AS) const

LLVM_ABI unsigned getCacheLineSize() const

LLVM_ABI bool isLoweredToCall(const Function *F) const

Test whether calls to a function lower to actual program function calls.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

Type * getType() const

All values are typed, get the type of this value.

int getNumOccurrences() const

@ BasicBlock

Various leaf nodes.

@ PD

PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI char & LoopSimplifyID

LLVM_ABI FunctionPass * createLoopDataPrefetchPass()

Definition LoopDataPrefetch.cpp:151

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)

iterator_range< df_iterator< T > > depth_first(const T &G)

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

Instruction * InsertPt

The point of insertion for the prefetch instruction.

Definition LoopDataPrefetch.cpp:237

void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)

Add the instruction.

Definition LoopDataPrefetch.cpp:252

bool Writes

True if targeting a write memory access.

Definition LoopDataPrefetch.cpp:239

Instruction * MemI

The (first seen) prefetched instruction.

Definition LoopDataPrefetch.cpp:241

const SCEVAddRecExpr * LSCEVAddRec

The address formula for this prefetch as returned by ScalarEvolution.

Definition LoopDataPrefetch.cpp:235

Prefetch(const SCEVAddRecExpr *L, Instruction *I)

Constructor to create a new Prefetch for I.

Definition LoopDataPrefetch.cpp:244

static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)

Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).