LLVM: lib/Transforms/Scalar/LoopDataPrefetch.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
15
32
33#define DEBUG_TYPE "loop-data-prefetch"
34
35using namespace llvm;
36
37
38
41 cl::desc("Prefetch write addresses"));
42
45 cl::desc("Number of instructions to prefetch ahead"),
47
51
53 "max-prefetch-iters-ahead",
55
56STATISTIC(NumPrefetches, "Number of prefetches inserted");
57
58namespace {
59
60
61class LoopDataPrefetch {
62public:
66 : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
67
68 bool run();
69
70private:
71 bool runOnLoop(Loop *L);
72
73
74
75 bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
76
77 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
78 unsigned NumStridedMemAccesses,
79 unsigned NumPrefetches,
80 bool HasCall) {
84 NumPrefetches, HasCall);
85 }
86
87 unsigned getPrefetchDistance() {
91 }
92
93 unsigned getMaxPrefetchIterationsAhead() {
97 }
98
99 bool doPrefetchWrites() {
103 }
104
111};
112
113
114class LoopDataPrefetchLegacyPass : public FunctionPass {
115public:
116 static char ID;
119 }
120
133 }
134
136 };
137}
138
139char LoopDataPrefetchLegacyPass::ID = 0;
141 "Loop Data Prefetch", false, false)
150
152 return new LoopDataPrefetchLegacyPass();
153}
154
155bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
156 unsigned TargetMinStride) {
157
158 if (TargetMinStride <= 1)
159 return true;
160
161 const auto *ConstStride = dyn_cast(AR->getStepRecurrence(*SE));
162
163
164 if (!ConstStride)
165 return false;
166
167 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
168 return TargetMinStride <= AbsStride;
169}
170
180
181 LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
182 bool Changed = LDP.run();
183
184 if (Changed) {
188 return PA;
189 }
190
192}
193
194bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
195 if (skipFunction(F))
196 return false;
197
198 DominatorTree *DT = &getAnalysis().getDomTree();
199 LoopInfo *LI = &getAnalysis().getLoopInfo();
200 ScalarEvolution *SE = &getAnalysis().getSE();
202 &getAnalysis().getAssumptionCache(F);
204 &getAnalysis().getORE();
206 &getAnalysis().getTTI(F);
207
208 LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
209 return LDP.run();
210}
211
212bool LoopDataPrefetch::run() {
213
214
215
217 LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "
218 "for loop data prefetch.\n");
219 return false;
220 }
221
222 bool MadeChange = false;
223
226 MadeChange |= runOnLoop(L);
227
228 return MadeChange;
229}
230
231
232
234
236
238
240
242
243
245 addInstruction(I);
246 };
247
248
249
250
251
253 int64_t PtrDiff = 0) {
254 if (!InsertPt) {
255 MemI = I;
256 InsertPt = I;
258 } else {
261 if (PrefBB != InsBB) {
263 if (DomBB != PrefBB)
265 }
266
267 if (isa(I) && PtrDiff == 0)
269 }
270 }
271};
272
273bool LoopDataPrefetch::runOnLoop(Loop *L) {
274 bool MadeChange = false;
275
276
277 if (->isInnermost())
278 return MadeChange;
279
282
283
285 bool HasCall = false;
286 for (const auto BB : L->blocks()) {
287
288
289 for (auto &I : *BB) {
292 if (F->getIntrinsicID() == Intrinsic::prefetch)
293 return MadeChange;
295 HasCall = true;
296 } else {
297 HasCall = true;
298 }
299 }
300 }
301 Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
302 }
303
304 if (.NumInsts.isValid())
305 return MadeChange;
306
307 unsigned LoopSize = *Metrics.NumInsts.getValue();
308 if (!LoopSize)
309 LoopSize = 1;
310
311 unsigned ItersAhead = getPrefetchDistance() / LoopSize;
312 if (!ItersAhead)
313 ItersAhead = 1;
314
315 if (ItersAhead > getMaxPrefetchIterationsAhead())
316 return MadeChange;
317
319 if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
320 return MadeChange;
321
322 unsigned NumMemAccesses = 0;
323 unsigned NumStridedMemAccesses = 0;
325 for (const auto BB : L->blocks())
326 for (auto &I : *BB) {
329
330 if (LoadInst *LMemI = dyn_cast(&I)) {
331 MemI = LMemI;
332 PtrValue = LMemI->getPointerOperand();
333 } else if (StoreInst *SMemI = dyn_cast(&I)) {
334 if (!doPrefetchWrites()) continue;
335 MemI = SMemI;
336 PtrValue = SMemI->getPointerOperand();
337 } else continue;
338
341 continue;
342 NumMemAccesses++;
343 if (L->isLoopInvariant(PtrValue))
344 continue;
345
346 const SCEV *LSCEV = SE->getSCEV(PtrValue);
347 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast(LSCEV);
348 if (!LSCEVAddRec)
349 continue;
350 NumStridedMemAccesses++;
351
352
353
354
355 bool DupPref = false;
356 for (auto &Pref : Prefetches) {
357 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
359 dyn_cast(PtrDiff)) {
360 int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
362 Pref.addInstruction(MemI, DT, PD);
363 DupPref = true;
364 break;
365 }
366 }
367 }
368 if (!DupPref)
369 Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
370 }
371
372 unsigned TargetMinStride =
373 getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
374 Prefetches.size(), HasCall);
375
377 << " iterations ahead (loop size: " << LoopSize << ") in "
378 << L->getHeader()->getParent()->getName() << ": " << *L);
380 << NumMemAccesses << " memory accesses, "
381 << NumStridedMemAccesses << " strided memory accesses, "
382 << Prefetches.size() << " potential prefetch(es), "
383 << "a minimum stride of " << TargetMinStride << ", "
384 << (HasCall ? "calls" : "no calls") << ".\n");
385
386 for (auto &P : Prefetches) {
387
388
389 if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
390 continue;
391
392 BasicBlock *BB = P.InsertPt->getParent();
395 SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
396 P.LSCEVAddRec->getStepRecurrence(*SE)));
397 if (!SCEVE.isSafeToExpand(NextLSCEV))
398 continue;
399
402 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
403
406 Builder.CreateIntrinsic(Intrinsic::prefetch, PrefPtrValue->getType(),
407 {PrefPtrValue, ConstantInt::get(I32, P.Writes),
408 ConstantInt::get(I32, 3),
409 ConstantInt::get(I32, 1)});
410 ++NumPrefetches;
412 << *P.MemI->getOperand(isa(P.MemI) ? 0 : 1)
413 << ", SCEV: " << *P.LSCEVAddRec << "\n");
414 ORE->emit([&]() {
416 << "prefetched memory access";
417 });
418
419 MadeChange = true;
420 }
421
422 return MadeChange;
423}
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
SmallVector< uint32_t, 0 > Writes
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This file provides the interface for LLVM's Loop Data Prefetching Pass.
static const Function * getCalledFunction(const Value *V)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getPrefetchDistance() const
bool enableWritePrefetching() const
unsigned getMaxPrefetchIterationsAhead() const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool shouldPrefetchAddressSpace(unsigned AS) const
unsigned getCacheLineSize() const
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
int getNumOccurrences() const
const ParentTy * getParent() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
FunctionPass * createLoopDataPrefetchPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< df_iterator< T > > depth_first(const T &G)
A record for a potential prefetch made during the initial scan of the loop.
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).