LLVM: lib/Transforms/Scalar/LoopDataPrefetch.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
15
32
33#define DEBUG_TYPE "loop-data-prefetch"
34
35using namespace llvm;
36
37
38
41 cl::desc("Prefetch write addresses"));
42
45 cl::desc("Number of instructions to prefetch ahead"),
47
51
53 "max-prefetch-iters-ahead",
55
56STATISTIC(NumPrefetches, "Number of prefetches inserted");
57
58namespace {
59
60
61class LoopDataPrefetch {
62public:
66 : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
67
68 bool run();
69
70private:
71 bool runOnLoop(Loop *L);
72
73
74
75 bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
76
77 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
78 unsigned NumStridedMemAccesses,
79 unsigned NumPrefetches,
80 bool HasCall) {
83 return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
84 NumPrefetches, HasCall);
85 }
86
87 unsigned getPrefetchDistance() {
90 return TTI->getPrefetchDistance();
91 }
92
93 unsigned getMaxPrefetchIterationsAhead() {
96 return TTI->getMaxPrefetchIterationsAhead();
97 }
98
99 bool doPrefetchWrites() {
102 return TTI->enableWritePrefetching();
103 }
104
105 AssumptionCache *AC;
106 DominatorTree *DT;
107 LoopInfo *LI;
108 ScalarEvolution *SE;
109 const TargetTransformInfo *TTI;
110 OptimizationRemarkEmitter *ORE;
111};
112
113
114class LoopDataPrefetchLegacyPass : public FunctionPass {
115public:
116 static char ID;
117 LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
119 }
120
121 void getAnalysisUsage(AnalysisUsage &AU) const override {
122 AU.addRequired();
123 AU.addRequired();
129 AU.addRequired();
130 AU.addRequired();
131 AU.addPreserved();
132 AU.addRequired();
133 }
134
136 };
137}
138
139char LoopDataPrefetchLegacyPass::ID = 0;
141 "Loop Data Prefetch", false, false)
150
152 return new LoopDataPrefetchLegacyPass();
153}
154
155bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
156 unsigned TargetMinStride) {
157
158 if (TargetMinStride <= 1)
159 return true;
160
162
163
164 if (!ConstStride)
165 return false;
166
167 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
168 return TargetMinStride <= AbsStride;
169}
170
180
181 LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
182 bool Changed = LDP.run();
183
188 return PA;
189 }
190
192}
193
194bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
195 if (skipFunction(F))
196 return false;
197
198 DominatorTree *DT = &getAnalysis().getDomTree();
199 LoopInfo *LI = &getAnalysis().getLoopInfo();
200 ScalarEvolution *SE = &getAnalysis().getSE();
202 &getAnalysis().getAssumptionCache(F);
204 &getAnalysis().getORE();
206 &getAnalysis().getTTI(F);
207
208 LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
209 return LDP.run();
210}
211
212bool LoopDataPrefetch::run() {
213
214
215
216 if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) {
217 LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "
218 "for loop data prefetch.\n");
219 return false;
220 }
221
222 bool MadeChange = false;
223
224 for (Loop *I : *LI)
226 MadeChange |= runOnLoop(L);
227
228 return MadeChange;
229}
230
231
232
234
236
238
240
242
243
247
248
249
250
251
253 int64_t PtrDiff = 0) {
258 } else {
261 if (PrefBB != InsBB) {
263 if (DomBB != PrefBB)
265 }
266
269 }
270 }
271};
272
273bool LoopDataPrefetch::runOnLoop(Loop *L) {
274 bool MadeChange = false;
275
276
277 if (->isInnermost())
278 return MadeChange;
279
280 SmallPtrSet<const Value *, 32> EphValues;
282
283
285 bool HasCall = false;
286 for (const auto BB : L->blocks()) {
287
288
289 for (auto &I : *BB) {
292 if (F->getIntrinsicID() == Intrinsic::prefetch)
293 return MadeChange;
295 HasCall = true;
296 } else {
297 HasCall = true;
298 }
299 }
300 }
301 Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
302 }
303
304 if (.NumInsts.isValid())
305 return MadeChange;
306
307 unsigned LoopSize = Metrics.NumInsts.getValue();
308 if (!LoopSize)
309 LoopSize = 1;
310
311 unsigned ItersAhead = getPrefetchDistance() / LoopSize;
312 if (!ItersAhead)
313 ItersAhead = 1;
314
315 if (ItersAhead > getMaxPrefetchIterationsAhead())
316 return MadeChange;
317
319 if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
320 return MadeChange;
321
322 unsigned NumMemAccesses = 0;
323 unsigned NumStridedMemAccesses = 0;
325 for (const auto BB : L->blocks())
326 for (auto &I : *BB) {
329
331 MemI = LMemI;
332 PtrValue = LMemI->getPointerOperand();
334 if (!doPrefetchWrites()) continue;
335 MemI = SMemI;
336 PtrValue = SMemI->getPointerOperand();
337 } else continue;
338
341 continue;
342 NumMemAccesses++;
343 if (L->isLoopInvariant(PtrValue))
344 continue;
345
346 const SCEV *LSCEV = SE->getSCEV(PtrValue);
348 if (!LSCEVAddRec)
349 continue;
350 NumStridedMemAccesses++;
351
352
353
354
355 bool DupPref = false;
356 for (auto &Pref : Prefetches) {
357 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
358 if (const SCEVConstant *ConstPtrDiff =
360 int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
362 Pref.addInstruction(MemI, DT, PD);
363 DupPref = true;
364 break;
365 }
366 }
367 }
368 if (!DupPref)
369 Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
370 }
371
372 unsigned TargetMinStride =
373 getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
374 Prefetches.size(), HasCall);
375
377 << " iterations ahead (loop size: " << LoopSize << ") in "
378 << L->getHeader()->getParent()->getName() << ": " << *L);
380 << NumMemAccesses << " memory accesses, "
381 << NumStridedMemAccesses << " strided memory accesses, "
382 << Prefetches.size() << " potential prefetch(es), "
383 << "a minimum stride of " << TargetMinStride << ", "
384 << (HasCall ? "calls" : "no calls") << ".\n");
385
386 for (auto &P : Prefetches) {
387
388
389 if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
390 continue;
391
392 BasicBlock *BB = P.InsertPt->getParent();
393 SCEVExpander SCEVE(*SE, BB->getDataLayout(), "prefaddr");
395 SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
396 P.LSCEVAddRec->getStepRecurrence(*SE)));
397 if (!SCEVE.isSafeToExpand(NextLSCEV))
398 continue;
399
401 Type *I8Ptr = PointerType::get(BB->getContext(), PtrAddrSpace);
402 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
403
406 Builder.CreateIntrinsic(Intrinsic::prefetch, PrefPtrValue->getType(),
407 {PrefPtrValue, ConstantInt::get(I32, P.Writes),
408 ConstantInt::get(I32, 3),
409 ConstantInt::get(I32, 1)});
410 ++NumPrefetches;
413 << ", SCEV: " << *P.LSCEVAddRec << "\n");
414 ORE->emit([&]() {
415 return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
416 << "prefetched memory access";
417 });
418
419 MadeChange = true;
420 }
421
422 return MadeChange;
423}
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
loop data Loop Data Prefetch
Definition LoopDataPrefetch.cpp:149
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This file provides the interface for LLVM's Loop Data Prefetching Pass.
static const Function * getCalledFunction(const Value *V)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
FunctionPass class - This class is used to implement most global optimizations.
Analysis pass that exposes the LoopInfo for a function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition LoopDataPrefetch.cpp:171
The legacy pass manager's analysis pass to compute loop information.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool shouldPrefetchAddressSpace(unsigned AS) const
LLVM_ABI unsigned getCacheLineSize() const
LLVM_ABI bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getType() const
All values are typed, get the type of this value.
int getNumOccurrences() const
@ BasicBlock
Various leaf nodes.
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI char & LoopSimplifyID
LLVM_ABI FunctionPass * createLoopDataPrefetchPass()
Definition LoopDataPrefetch.cpp:151
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Instruction * InsertPt
The point of insertion for the prefetch instruction.
Definition LoopDataPrefetch.cpp:237
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
Definition LoopDataPrefetch.cpp:252
bool Writes
True if targeting a write memory access.
Definition LoopDataPrefetch.cpp:239
Instruction * MemI
The (first seen) prefetched instruction.
Definition LoopDataPrefetch.cpp:241
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Definition LoopDataPrefetch.cpp:235
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Definition LoopDataPrefetch.cpp:244
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).