LLVM: lib/Analysis/LoopAccessAnalysis.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
61#include
62#include
63#include
64#include
65#include
66#include
67#include
68
69using namespace llvm;
71
72#define DEBUG_TYPE "loop-accesses"
73
76 cl::desc("Sets the SIMD width. Zero is autoselect."),
79
82 cl::desc("Sets the vectorization interleave count. "
83 "Zero is autoselect."),
87
89 "runtime-memory-check-threshold", cl::Hidden,
90 cl::desc("When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
94
95
97 "memory-check-merge-threshold", cl::Hidden,
98 cl::desc("Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
101
102
104
105
108 cl::desc("Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
111
112
113
114
115
116
117
118
119
120
121
122
125 cl::desc("Enable symbolic stride memory access versioning"));
126
127
128
130 "store-to-load-forwarding-conflict-detection", cl::Hidden,
131 cl::desc("Enable conflict detection in loop-access analysis"),
133
136 cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
138
140 "laa-speculate-unit-stride", cl::Hidden,
141 cl::desc("Speculate that non-constant strides are unit in LAA"),
143
147 "Hoist inner loop runtime memory checks to outer loop if possible"),
150
152 return ::VectorizationInterleave.getNumOccurrences() > 0;
153}
154
159
160
161
162 const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);
163 if (!StrideSCEV)
164
165 return OrigSCEV;
166
167
168
169
170
172
177
178 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
179 << " by: " << *Expr << "\n");
180 return Expr;
181}
182
185 : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
190 Members.push_back(Index);
191}
192
193
194
201
202
203
210
211
212
217 std::optionalScalarEvolution::LoopGuards &LoopGuards) {
220 if (!StartPtr)
221 return false;
223 bool CheckForNonNull, CheckForFreed;
224 Value *StartPtrV = StartPtr->getValue();
226 DL, CheckForNonNull, CheckForFreed);
227
228 if (DerefBytes && (CheckForNonNull || CheckForFreed))
229 return false;
230
233 const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
234
235
236 Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
237 if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
239 CtxI = LoopPred->getTerminator();
240 }
245 return false;
248 return false;
249 DerefRK = std::max(DerefRK, RK);
250 return true;
251 });
252 if (DerefRK) {
253 DerefBytesSCEV =
255 }
256
257 if (DerefBytesSCEV->isZero())
258 return false;
259
262 return false;
263
266
267
269 return false;
272
273 if (!LoopGuards)
276
277 const SCEV *OffsetAtLastIter =
279 if (!OffsetAtLastIter) {
280
281
284 return false;
286 MaxBTC, WiderTy);
287 OffsetAtLastIter =
289 if (!OffsetAtLastIter)
290 return false;
291 }
292
295 if (!OffsetEndBytes)
296 return false;
297
298 if (IsKnownNonNegative) {
299
300
301
303 if (!EndBytes)
304 return false;
305
306 DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards);
308 }
309
310
311
312
316}
317
319 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
321 DenseMap<std::pair<const SCEV *, Type *>,
322 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
324 std::optionalScalarEvolution::LoopGuards &LoopGuards) {
325 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
328 {{PtrExpr, AccessTy},
330 if (!Ins)
331 return Iter->second;
332 PtrBoundsPair = &Iter->second;
333 }
334
335 const SCEV *ScStart;
336 const SCEV *ScEnd;
337
339 Type *IdxTy = DL.getIndexType(PtrExpr->getType());
342 ScStart = ScEnd = PtrExpr;
344 ScStart = AR->getStart();
346
347
348
349
350 ScEnd = AR->evaluateAtIteration(BTC, *SE);
351 else {
352
353
354
355
356
357
358
360 DT, AC, LoopGuards)) {
361 ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
362 } else {
366 ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType())));
367 }
368 }
369 const SCEV *Step = AR->getStepRecurrence(*SE);
370
371
372
374 if (CStep->getValue()->isNegative())
376 } else {
377
378
379
380 ScStart = SE->getUMinExpr(ScStart, ScEnd);
381 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
382 }
383 } else
385
388
389
390 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
391
392 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
394 *PtrBoundsPair = Res;
395 return Res;
396}
397
398
399
401 Type *AccessTy, bool WritePtr,
402 unsigned DepSetId, unsigned ASId,
404 bool NeedsFreeze) {
408 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
409 &DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);
412 "must be able to compute both start and end expressions");
413 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
414 NeedsFreeze);
415}
416
417bool RuntimePointerChecking::tryToCreateDiffCheck(
419
420
421
423 return false;
424
427
428
429
432 return false;
433
438
439
440 if (AccSrc.size() != 1 || AccSink.size() != 1)
441 return false;
442
443
444 if (AccSink[0] < AccSrc[0])
446
448 const SCEV *SrcStart;
449 const SCEV *SinkStart;
451 if ((Src->Expr,
454 (Sink->Expr,
457 return false;
458
466 return false;
467
469 unsigned AllocSize =
470 std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
471
472
473
474
476 return false;
477
481
482
485
490 return false;
491
492
493
494
495
500 const Loop *StartARLoop = SrcStartAR->getLoop();
501 if (StartARLoop == SinkStartAR->getLoop() &&
503
504
505
506 SrcStartAR->getStepRecurrence(*SE) !=
507 SinkStartAR->getStepRecurrence(*SE)) {
508 LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
509 "cannot be hoisted out of the outer loop\n");
510 return false;
511 }
512 }
513
514 LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
515 << "SrcStart: " << *SrcStartInt << '\n'
516 << "SinkStartInt: " << *SinkStartInt << '\n');
517 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
518 Src->NeedsFreeze || Sink->NeedsFreeze);
519 return true;
520}
521
523 SmallVector<RuntimePointerCheck, 4> Checks;
524
526 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
529
531 CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);
532 Checks.emplace_back(&CGI, &CGJ);
533 }
534 }
535 }
536 return Checks;
537}
538
541 assert(Checks.empty() && "Checks is not empty");
542 groupChecks(DepCands, UseDependencies);
544}
545
548 for (const auto &I : M.Members)
549 for (const auto &J : N.Members)
551 return true;
552 return false;
553}
554
555
556
560 if (!Diff)
561 return nullptr;
562 return Diff->isNegative() ? J : I;
563}
564
568 Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
569 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
570 RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
571}
572
574 const SCEV *End, unsigned AS,
578 "all pointers in a checking group must be in the same address space");
579
580
581
582
584 if (!Min0)
585 return false;
586
588 if (!Min1)
589 return false;
590
591
592 if (Min0 == Start)
593 Low = Start;
594
595
596 if (Min1 != End)
598
599 Members.push_back(Index);
601 return true;
602}
603
604void RuntimePointerChecking::groupChecks(
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650 if (!UseDependencies) {
651 for (unsigned I = 0; I < Pointers.size(); ++I)
653 return;
654 }
655
656 unsigned TotalComparisons = 0;
657
659 for (unsigned Index = 0; Index < Pointers.size(); ++Index)
660 PositionMap[Pointers[Index].PointerValue].push_back(Index);
661
662
663
665
666
667
668
669 for (unsigned I = 0; I < Pointers.size(); ++I) {
670
671
673 continue;
674
677
679
680
681
682
683
684
686 auto PointerI = PositionMap.find(M.getPointer());
687
688
689 if (PointerI == PositionMap.end())
690 continue;
691 for (unsigned Pointer : PointerI->second) {
692 bool Merged = false;
693
694 Seen.insert(Pointer);
695
696
697
699
700
701
702
704 break;
705
706 TotalComparisons++;
707
708 if (Group.addPointer(Pointer, *this)) {
709 Merged = true;
710 break;
711 }
712 }
713
714 if (!Merged)
715
716
717
718 Groups.emplace_back(Pointer, *this);
719 }
720 }
721
722
723
725 }
726}
727
730 unsigned PtrIdx2) {
731 return (PtrToPartition[PtrIdx1] != -1 &&
732 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
733}
734
738
739
741 return false;
742
743
745 return false;
746
747
749}
750
751
755 for (const auto &[Idx, CG] : enumerate(CheckingGroups))
756 PtrIndices[&CG] = Idx;
757 return PtrIndices;
758}
759
762 unsigned Depth) const {
763 unsigned N = 0;
765 for (const auto &[Check1, Check2] : Checks) {
766 const auto &First = Check1->Members, &Second = Check2->Members;
767 OS.indent(Depth) << "Check " << N++ << ":\n";
768 OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)
769 << ":\n";
770 for (unsigned K : First)
772 OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)
773 << ":\n";
774 for (unsigned K : Second)
776 }
777}
778
780
781 OS.indent(Depth) << "Run-time memory checks:\n";
783
784 OS.indent(Depth) << "Grouped accesses:\n";
787 OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";
788 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
789 << ")\n";
790 for (unsigned Member : CG.Members) {
792 }
793 }
794}
795
796namespace {
797
798
799
800
801
802class AccessAnalysis {
803public:
804
807
812 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),
813 PSE(PSE), LoopAliasScopes(LoopAliasScopes) {
814
815 BAA.enableCrossIterationMode();
816 }
817
818
821 AST.add(adjustLoc(Loc));
822 Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
823 if (IsReadOnly)
824 ReadOnlyPtr.insert(Ptr);
825 }
826
827
828 void addStore(const MemoryLocation &Loc, Type *AccessTy) {
830 AST.add(adjustLoc(Loc));
831 Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
832 }
833
834
835
836
837
838
839
840
841 bool createCheckForAccess(RuntimePointerChecking &RtCheck,
842 MemAccessInfo Access, Type *AccessTy,
843 const DenseMap<Value *, const SCEV *> &Strides,
844 DenseMap<Value *, unsigned> &DepSetId,
845 Loop *TheLoop, unsigned &RunningDepId,
846 unsigned ASId, bool Assume);
847
848
849
850
851
852
853
854
855
856 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
857 const DenseMap<Value *, const SCEV *> &Strides,
858 Value *&UncomputablePtr, bool AllowPartial);
859
860
861
862 void buildDependenceSets() {
863 processMemAccesses();
864 }
865
866
867
868
869
870
871 bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
872
873
874 void resetDepChecks(MemoryDepChecker &DepChecker) {
875 CheckDeps.clear();
877 }
878
879 const MemAccessInfoList &getDependenciesToCheck() const { return CheckDeps; }
880
881private:
882 typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
883
884
885
886 MemoryLocation adjustLoc(MemoryLocation Loc) const {
887
888
892 return Loc;
893 }
894
895
896 MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
897 if (!ScopeList)
898 return nullptr;
899
900
901
903 return LoopAliasScopes.contains(cast(Scope));
904 }))
905 return nullptr;
906
907 return ScopeList;
908 }
909
910
911
912 void processMemAccesses();
913
914
915
917
918
919 const Loop *TheLoop;
920
921
922 MemAccessInfoList CheckDeps;
923
924
925 SmallPtrSet<Value*, 16> ReadOnlyPtr;
926
927
928 BatchAAResults BAA;
929
930
931
932 AliasSetTracker AST;
933
934
935 const LoopInfo *LI;
936
937
938 DominatorTree &DT;
939
940
941
942
944
945
946
947
948
949
950
951
952 bool IsRTCheckAnalysisNeeded = false;
953
954
955 PredicatedScalarEvolution &PSE;
956
957 DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
958
959
960
961 SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
962};
963
964}
965
966
967
968static std::optional<int64_t>
972 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
973 << "\n");
974 return std::nullopt;
975 }
976
977
978 if (Lp != AR->getLoop()) {
980 dbgs() << "LAA: Bad stride - Not striding over innermost loop ";
981 if (Ptr)
982 dbgs() << *Ptr << " ";
983
984 dbgs() << "SCEV: " << *AR << "\n";
985 });
986 return std::nullopt;
987 }
988
989
991
992
993 const APInt *APStepVal;
996 dbgs() << "LAA: Bad stride - Not a constant strided ";
997 if (Ptr)
998 dbgs() << *Ptr << " ";
999 dbgs() << "SCEV: " << *AR << "\n";
1000 });
1001 return std::nullopt;
1002 }
1003
1005 TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1007
1008
1009 std::optional<int64_t> StepVal = APStepVal->trySExtValue();
1010 if (!StepVal)
1011 return std::nullopt;
1012
1013
1014 return *StepVal % Size ? std::nullopt : std::make_optional(*StepVal / Size);
1015}
1016
1017
1018
1020 Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,
1022 std::optional<int64_t> Stride = std::nullopt) {
1023
1025 return true;
1026
1028 return true;
1029
1030
1031
1032
1033
1034
1036 GEP && GEP->hasNoUnsignedSignedWrap()) {
1037
1038
1039 if (L->getHeader() == L->getLoopLatch() ||
1041 if (getLoadStorePointerOperand(U) != GEP)
1042 return false;
1043 BasicBlock *UserBB = cast(U)->getParent();
1044 return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);
1045 }))
1046 return true;
1047 }
1048
1049 if (!Stride)
1051 if (Stride) {
1052
1053
1054
1057 (Stride == 1 || Stride == -1))
1058 return true;
1059 }
1060
1061 if (Ptr && Assume) {
1064 << "LAA: Pointer: " << *Ptr << "\n"
1065 << "LAA: SCEV: " << *AR << "\n"
1066 << "LAA: Added an overflow assumption\n");
1067 return true;
1068 }
1069
1070 return false;
1071}
1072
1078
1079 while (!WorkList.empty()) {
1081 if (!Visited.insert(Ptr).second)
1082 continue;
1084
1085
1086
1087 if (PN && InnermostLoop.contains(PN->getParent()) &&
1088 PN->getParent() != InnermostLoop.getHeader()) {
1090 } else
1091 AddPointer(Ptr);
1092 }
1093}
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1115 unsigned Depth) {
1116
1117
1118
1119
1124 return;
1125 }
1126
1128
1131 };
1132
1133 auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
1134 switch (Opcode) {
1135 case Instruction::Add:
1137 case Instruction::Sub:
1139 default:
1140 llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
1141 }
1142 };
1143
1145 unsigned Opcode = I->getOpcode();
1146 switch (Opcode) {
1147 case Instruction::GetElementPtr: {
1149 Type *SourceTy = GEP->getSourceElementType();
1150
1151
1152 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
1154 break;
1155 }
1160
1161
1162 bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
1163 any_of(OffsetScevs, UndefPoisonCheck);
1164
1165
1166
1167
1168 if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
1169 BaseScevs.push_back(BaseScevs[0]);
1170 else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
1171 OffsetScevs.push_back(OffsetScevs[0]);
1172 else {
1173 ScevList.emplace_back(Scev, NeedsFreeze);
1174 break;
1175 }
1176
1178
1179
1180
1181
1183
1184 for (auto [B, O] : zip(BaseScevs, OffsetScevs)) {
1187
1188
1192 }
1193 break;
1194 }
1195 case Instruction::Select: {
1197
1198
1199
1202 if (ChildScevs.size() == 2)
1204 else
1206 break;
1207 }
1208 case Instruction::PHI: {
1210
1211
1212
1213 if (I->getNumOperands() == 2) {
1216 }
1217 if (ChildScevs.size() == 2)
1219 else
1221 break;
1222 }
1223 case Instruction::Add:
1224 case Instruction::Sub: {
1229
1230
1231 bool NeedsFreeze =
1232 any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
1233
1234
1235
1236
1237 if (LScevs.size() == 2 && RScevs.size() == 1)
1239 else if (RScevs.size() == 2 && LScevs.size() == 1)
1241 else {
1242 ScevList.emplace_back(Scev, NeedsFreeze);
1243 break;
1244 }
1245
1246 for (auto [L, R] : zip(LScevs, RScevs))
1247 ScevList.emplace_back(GetBinOpExpr(Opcode, get<0>(L), get<0>(R)),
1248 NeedsFreeze);
1249 break;
1250 }
1251 default:
1252
1253 LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1255 break;
1256 }
1257}
1258
1259bool AccessAnalysis::createCheckForAccess(
1263 unsigned &RunningDepId, unsigned ASId, bool Assume) {
1267
1271 "Must have some runtime-check pointer candidates");
1272
1273
1274
1275 auto IsLoopInvariantOrAR =
1279 };
1280 if (RTCheckPtrs.size() == 2 && all_of(RTCheckPtrs, IsLoopInvariantOrAR)) {
1281 LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n";
1282 for (const auto &[Idx, Q] : enumerate(RTCheckPtrs)) dbgs()
1283 << "\t(" << Idx << ") " << *Q.getPointer() << "\n");
1284 } else {
1286 }
1287
1288
1289
1290 for (auto &P : RTCheckPtrs) {
1291
1293 continue;
1294
1296 if (!AR && Assume)
1299 return false;
1300
1301
1302
1303 if (RTCheckPtrs.size() == 1) {
1304 AR =
1306 P.setPointer(AR);
1307 }
1308
1309 if ((PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
1310 TheLoop, Assume, DT))
1311 return false;
1312 }
1313
1314 for (const auto &[PtrExpr, NeedsFreeze] : RTCheckPtrs) {
1315
1316 unsigned DepId;
1317
1318 if (isDependencyCheckNeeded()) {
1320 unsigned &LeaderId = DepSetId[Leader];
1321 if (!LeaderId)
1322 LeaderId = RunningDepId++;
1323 DepId = LeaderId;
1324 } else
1325
1326 DepId = RunningDepId++;
1327
1328 bool IsWrite = Access.getInt();
1329 RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1330 NeedsFreeze);
1331 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1332 }
1333
1334 return true;
1335}
1336
1337bool AccessAnalysis::canCheckPtrAtRT(
1340 bool AllowPartial) {
1341
1342
1343 bool CanDoRT = true;
1344
1345 bool MayNeedRTCheck = false;
1346 if (!IsRTCheckAnalysisNeeded) return true;
1347
1348 bool IsDepCheckNeeded = isDependencyCheckNeeded();
1349
1350
1351
1352 unsigned ASId = 0;
1353 for (const auto &AS : AST) {
1354 int NumReadPtrChecks = 0;
1355 int NumWritePtrChecks = 0;
1356 bool CanDoAliasSetRT = true;
1357 ++ASId;
1358 auto ASPointers = AS.getPointers();
1359
1360
1361
1362 unsigned RunningDepId = 1;
1364
1366
1367
1368
1370 for (const Value *ConstPtr : ASPointers) {
1371 Value *Ptr = const_cast<Value *>(ConstPtr);
1372 bool IsWrite = Accesses.contains(MemAccessInfo(Ptr, true));
1373 if (IsWrite)
1374 ++NumWritePtrChecks;
1375 else
1376 ++NumReadPtrChecks;
1378 }
1379
1380
1381
1382 if (NumWritePtrChecks == 0 ||
1383 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1384 assert((ASPointers.size() <= 1 ||
1386 [this](const Value *Ptr) {
1387 MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1388 true);
1389 return !DepCands.contains(AccessWrite);
1390 })) &&
1391 "Can only skip updating CanDoRT below, if all entries in AS "
1392 "are reads or there is at most 1 entry");
1393 continue;
1394 }
1395
1396 for (auto &Access : AccessInfos) {
1398 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1399 DepSetId, TheLoop, RunningDepId, ASId,
1400 false)) {
1401 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1402 << *Access.getPointer() << '\n');
1404 CanDoAliasSetRT = false;
1405 }
1406 }
1407 }
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1419
1420
1421
1422 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1423
1424
1425
1426 CanDoAliasSetRT = true;
1427 for (const auto &[Access, AccessTy] : Retries) {
1428 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1429 DepSetId, TheLoop, RunningDepId, ASId,
1430 true)) {
1431 CanDoAliasSetRT = false;
1432 UncomputablePtr = Access.getPointer();
1433 if (!AllowPartial)
1434 break;
1435 }
1436 }
1437 }
1438
1439 CanDoRT &= CanDoAliasSetRT;
1440 MayNeedRTCheck |= NeedsAliasSetRTCheck;
1441 ++ASId;
1442 }
1443
1444
1445
1446
1447
1448
1449 unsigned NumPointers = RtCheck.Pointers.size();
1450 for (unsigned i = 0; i < NumPointers; ++i) {
1451 for (unsigned j = i + 1; j < NumPointers; ++j) {
1452
1453 if (RtCheck.Pointers[i].DependencySetId ==
1454 RtCheck.Pointers[j].DependencySetId)
1455 continue;
1456
1457 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1458 continue;
1459
1462
1465 if (ASi != ASj) {
1467 dbgs() << "LAA: Runtime check would require comparison between"
1468 " different address spaces\n");
1469 return false;
1470 }
1471 }
1472 }
1473
1474 if (MayNeedRTCheck && (CanDoRT || AllowPartial))
1476
1478 << " pointer comparisons.\n");
1479
1480
1481
1482
1484
1485 bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1486 assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&
1487 "CanDoRTIfNeeded depends on RtCheck.Need");
1488 if (!CanDoRTIfNeeded && !AllowPartial)
1489 RtCheck.reset();
1490 return CanDoRTIfNeeded;
1491}
1492
1493void AccessAnalysis::processMemAccesses() {
1494
1495
1496
1497
1498 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1503 dbgs() << "\t" << *A.getPointer() << " ("
1504 << (A.getInt()
1505 ? "write"
1506 : (ReadOnlyPtr.contains(A.getPointer()) ? "read-only"
1507 : "read"))
1508 << ")\n";
1509 });
1510
1511
1512
1513
1514
1515 for (const auto &AS : AST) {
1516
1517
1518
1519 auto ASPointers = AS.getPointers();
1520
1521 bool SetHasWrite = false;
1522
1523
1524
1526 UnderlyingObjToAccessMap;
1527 UnderlyingObjToAccessMap ObjToLastAccess;
1528
1529
1530 PtrAccessMap DeferredAccesses;
1531
1532
1533
1534 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
1535 bool UseDeferred = SetIteration > 0;
1536 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1537
1538 for (const Value *ConstPtr : ASPointers) {
1539 Value *Ptr = const_cast<Value *>(ConstPtr);
1540
1541
1542
1543 for (const auto &[AC, _] : S) {
1544 if (AC.getPointer() != Ptr)
1545 continue;
1546
1547 bool IsWrite = AC.getInt();
1548
1549
1550
1551 bool IsReadOnlyPtr = ReadOnlyPtr.contains(Ptr) && !IsWrite;
1552 if (UseDeferred && !IsReadOnlyPtr)
1553 continue;
1554
1555
1556 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1557 S.contains(MemAccessInfo(Ptr, false))) &&
1558 "Alias-set pointer not in the access set?");
1559
1560 MemAccessInfo Access(Ptr, IsWrite);
1562
1563
1564
1565
1566
1567
1568 if (!UseDeferred && IsReadOnlyPtr) {
1569
1570
1571 DeferredAccesses.insert({Access, {}});
1572 continue;
1573 }
1574
1575
1576
1577
1578
1579 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
1580 CheckDeps.push_back(Access);
1581 IsRTCheckAnalysisNeeded = true;
1582 }
1583
1584 if (IsWrite)
1585 SetHasWrite = true;
1586
1587
1588
1590 UOs = {};
1593 << "Underlying objects for pointer " << *Ptr << "\n");
1594 for (const Value *UnderlyingObj : UOs) {
1595
1596
1601 continue;
1602
1603 auto [It, Inserted] = ObjToLastAccess.try_emplace(
1604 {UnderlyingObj,
1607 if (!Inserted) {
1609 It->second = Access;
1610 }
1611
1612 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
1613 }
1614 }
1615 }
1616 }
1617 }
1618}
1619
1620
1621std::optional<int64_t>
1625 bool Assume, bool ShouldCheckWrap) {
1628 return 0;
1629
1631
1633 if (Assume && !AR)
1635
1636 if (!AR) {
1637 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1638 << " SCEV: " << *PtrScev << "\n");
1639 return std::nullopt;
1640 }
1641
1642 std::optional<int64_t> Stride =
1644 if (!ShouldCheckWrap || !Stride)
1645 return Stride;
1646
1647 if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride))
1648 return Stride;
1649
1651 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1652 << *Ptr << " SCEV: " << *AR << "\n");
1653 return std::nullopt;
1654}
1655
1660 bool StrictCheck, bool CheckType) {
1661 assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1662
1663
1664 if (PtrA == PtrB)
1665 return 0;
1666
1667
1668 if (CheckType && ElemTyA != ElemTyB)
1669 return std::nullopt;
1670
1673
1674
1675 if (ASA != ASB)
1676 return std::nullopt;
1677 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1678
1679 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1681 DL, OffsetA, true);
1683 DL, OffsetB, true);
1684
1685 std::optional<int64_t> Val;
1686 if (PtrA1 == PtrB1) {
1687
1688
1691
1692 if (ASA != ASB)
1693 return std::nullopt;
1694
1695 IdxWidth = DL.getIndexSizeInBits(ASA);
1696 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1697 OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1698
1699 OffsetB -= OffsetA;
1701 } else {
1702
1703 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1704 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1705 std::optional Diff =
1707 if (!Diff)
1708 return std::nullopt;
1709 Val = Diff->trySExtValue();
1710 }
1711
1712 if (!Val)
1713 return std::nullopt;
1714
1715 int64_t Size = DL.getTypeStoreSize(ElemTyA);
1716 int64_t Dist = *Val / Size;
1717
1718
1719
1720 if (!StrictCheck || Dist * Size == Val)
1721 return Dist;
1722 return std::nullopt;
1723}
1724
1729 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1730 "Expected list of pointer operands.");
1731
1732
1733 Value *Ptr0 = VL[0];
1734
1735 using DistOrdPair = std::pair<int64_t, unsigned>;
1737 std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1738 Offsets.emplace(0, 0);
1739 bool IsConsecutive = true;
1741 std::optional<int64_t> Diff =
1743 true);
1744 if (!Diff)
1745 return false;
1746
1747
1748 int64_t Offset = *Diff;
1749 auto [It, IsInserted] = Offsets.emplace(Offset, Idx);
1750 if (!IsInserted)
1751 return false;
1752
1753 IsConsecutive &= std::next(It) == Offsets.end();
1754 }
1755 SortedIndices.clear();
1756 if (!IsConsecutive) {
1757
1759 for (auto [Idx, Off] : enumerate(Offsets))
1760 SortedIndices[Idx] = Off.second;
1761 }
1762 return true;
1763}
1764
1765
1770 if (!PtrA || !PtrB)
1771 return false;
1774 std::optional<int64_t> Diff =
1776 true, CheckType);
1777 return Diff == 1;
1778}
1779
1783 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1784 InstMap.push_back(SI);
1785 ++AccessIdx;
1786 });
1787}
1788
1791 [this, LI](Value *Ptr) {
1792 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1793 InstMap.push_back(LI);
1794 ++AccessIdx;
1795 });
1796}
1797
1800 switch (Type) {
1805
1813 }
1815}
1816
1818 switch (Type) {
1824 return false;
1825
1829 return true;
1830 }
1832}
1833
1837
1839 switch (Type) {
1842 return true;
1843
1850 return false;
1851 }
1853}
1854
1855bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1857 unsigned CommonStride) {
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1871
1872 uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
1874 MaxStoreLoadForwardSafeDistanceInBits);
1875
1876
1877 for (uint64_t VF = 2 * TypeByteSize;
1878 VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
1879
1880
1881 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1882 MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
1883 break;
1884 }
1885 }
1886
1887 if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
1889 dbgs() << "LAA: Distance " << Distance
1890 << " that could cause a store-load forwarding conflict\n");
1891 return true;
1892 }
1893
1894 if (CommonStride &&
1895 MaxVFWithoutSLForwardIssuesPowerOf2 <
1896 MaxStoreLoadForwardSafeDistanceInBits &&
1897 MaxVFWithoutSLForwardIssuesPowerOf2 !=
1899 uint64_t MaxVF =
1900 bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
1901 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1902 MaxStoreLoadForwardSafeDistanceInBits =
1903 std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
1904 }
1905 return false;
1906}
1907
1909 if (Status < S)
1910 Status = S;
1911}
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1926 const SCEV &MaxBTC, const SCEV &Dist,
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1948
1949 const SCEV *CastedDist = &Dist;
1950 const SCEV *CastedProduct = Product;
1951 uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
1952 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1953
1954
1955
1956
1957 if (DistTypeSizeBits > ProductTypeSizeBits)
1959 else
1961
1962
1963
1966 return true;
1967
1968
1969
1973}
1974
1975
1976
1977
1978
1979
1982 assert(Stride > 1 && "The stride must be greater than 1");
1983 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1984 assert(Distance > 0 && "The distance must be non-zero");
1985
1986
1987 if (Distance % TypeByteSize)
1988 return false;
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006 return Distance % Stride;
2007}
2008
2009bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
2010 Type *SrcTy,
2011 const SCEV *Sink,
2012 Type *SinkTy) {
2013 const SCEV *BTC = PSE.getBackedgeTakenCount();
2014 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
2015 ScalarEvolution &SE = *PSE.getSE();
2016 const auto &[SrcStart_, SrcEnd_] =
2018 &SE, &PointerBounds, DT, AC, LoopGuards);
2020 return false;
2021
2022 const auto &[SinkStart_, SinkEnd_] =
2024 &SE, &PointerBounds, DT, AC, LoopGuards);
2027 return false;
2028
2029 if (!LoopGuards)
2031
2033 auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);
2035 return true;
2036
2037 auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);
2038 auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);
2040}
2041
2043 MemoryDepChecker::DepDistanceStrideAndSizeInfo>
2044MemoryDepChecker::getDependenceDistanceStrideAndSize(
2045 const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
2046 const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
2047 const auto &DL = InnermostLoop->getHeader()->getDataLayout();
2048 auto &SE = *PSE.getSE();
2049 const auto &[APtr, AIsWrite] = A;
2050 const auto &[BPtr, BIsWrite] = B;
2051
2052
2053 if (!AIsWrite && !BIsWrite)
2055
2058
2059
2060 if (APtr->getType()->getPointerAddressSpace() !=
2061 BPtr->getType()->getPointerAddressSpace())
2063
2064 std::optional<int64_t> StrideAPtr = getPtrStride(
2065 PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);
2066 std::optional<int64_t> StrideBPtr = getPtrStride(
2067 PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);
2068
2069 const SCEV *Src = PSE.getSCEV(APtr);
2070 const SCEV *Sink = PSE.getSCEV(BPtr);
2071
2072
2073
2074
2075 if (StrideAPtr && *StrideAPtr < 0) {
2079 std::swap(StrideAPtr, StrideBPtr);
2080 }
2081
2082 const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
2083
2084 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
2085 << "\n");
2086 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
2087 << ": " << *Dist << "\n");
2088
2089
2090
2091
2092
2093
2094
2095
2096 if (!StrideAPtr || !StrideBPtr) {
2097 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
2099 }
2100
2101 int64_t StrideAPtrInt = *StrideAPtr;
2102 int64_t StrideBPtrInt = *StrideBPtr;
2103 LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
2104 << " Sink induction step: " << StrideBPtrInt << "\n");
2105
2106
2107 if (!StrideAPtrInt || !StrideBPtrInt)
2109
2110
2111
2112 if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
2114 dbgs() << "Pointer access with strides in different directions\n");
2116 }
2117
2118 TypeSize AStoreSz = DL.getTypeStoreSize(ATy);
2119 TypeSize BStoreSz = DL.getTypeStoreSize(BTy);
2120
2121
2122
2123 uint64_t ASz = DL.getTypeAllocSize(ATy);
2124 uint64_t BSz = DL.getTypeAllocSize(BTy);
2125 uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
2126
2127 uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
2128 uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
2129
2130 uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);
2131
2132 std::optional<uint64_t> CommonStride;
2133 if (StrideAScaled == StrideBScaled)
2134 CommonStride = StrideAScaled;
2135
2136
2137
2139 ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
2140
2141
2143 LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");
2145 }
2146
2147 return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2148 TypeByteSize, AIsWrite, BIsWrite);
2149}
2150
2152MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2154 assert(AIdx < BIdx && "Must pass arguments in program order");
2155
2156
2157
2158
2159 auto CheckCompletelyBeforeOrAfter = [&]() {
2160 auto *APtr = A.getPointer();
2161 auto *BPtr = B.getPointer();
2164 const SCEV *Src = PSE.getSCEV(APtr);
2165 const SCEV *Sink = PSE.getSCEV(BPtr);
2166 return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);
2167 };
2168
2169
2170
2171 auto Res =
2172 getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2173 if (std::holds_alternativeDependence::DepType(Res)) {
2175 CheckCompletelyBeforeOrAfter())
2177 return std::getDependence::DepType(Res);
2178 }
2179
2180 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
2181 std::get(Res);
2182 bool HasSameSize = TypeByteSize > 0;
2183
2184 ScalarEvolution &SE = *PSE.getSE();
2185 auto &DL = InnermostLoop->getHeader()->getDataLayout();
2186
2187
2188
2189
2190
2191
2192 if (HasSameSize &&
2194 DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
2196
2197
2198
2199 const APInt *APDist = nullptr;
2200 uint64_t ConstDist =
2202
2203
2204 if (APDist) {
2205
2206
2207 if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
2209 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2211 }
2212 } else {
2213 if (!LoopGuards)
2214 LoopGuards.emplace(
2217 }
2218
2219
2222 if (HasSameSize) {
2223
2225 }
2226 LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2227 "different type sizes\n");
2229 }
2230
2231 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2232
2233
2234
2235
2236
2237
2238
2239
2241 if (!ConstDist) {
2244 }
2245 if (!HasSameSize ||
2246 couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
2248 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2250 }
2251 }
2252
2253 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2255 }
2256
2258
2259 if (MinDistance <= 0) {
2262 }
2263
2264 if (!HasSameSize) {
2265 if (CheckCompletelyBeforeOrAfter())
2267 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2268 "different type sizes\n");
2270 }
2271
2276
2277 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312 uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
2313 if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
2314 if (!ConstDist) {
2315
2316
2317
2318
2321 }
2322 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
2323 << MinDistance << '\n');
2325 }
2326
2327
2328
2329 if (MinDistanceNeeded > MinDepDistBytes) {
2330 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2331 << MinDistanceNeeded << " size in bytes\n");
2333 }
2334
2335 MinDepDistBytes =
2336 std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
2337
2338 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2340 couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
2342
2343 uint64_t MaxVF = MinDepDistBytes / MaxStride;
2344 LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
2345 << " with max VF = " << MaxVF << '\n');
2346
2347 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2348 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2349
2350
2351
2352
2355 }
2356
2357 if (CheckCompletelyBeforeOrAfter())
2359
2360 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2362}
2363
2366
2367 MinDepDistBytes = -1;
2370 if (Visited.contains(CurAccess))
2371 continue;
2372
2373
2378
2379
2380 while (AI != AE) {
2381 Visited.insert(*AI);
2382 bool AIIsWrite = AI->getInt();
2383
2384
2386 (AIIsWrite ? AI : std::next(AI));
2387 while (OI != AE) {
2388
2389 auto &Acc = Accesses[*AI];
2390 for (std::vector::iterator I1 = Acc.begin(), I1E = Acc.end();
2391 I1 != I1E; ++I1)
2392
2393
2394 for (std::vector::iterator
2395 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2396 I2E = (OI == AI ? I1E : Accesses[*OI].end());
2397 I2 != I2E; ++I2) {
2398 auto A = std::make_pair(&*AI, *I1);
2399 auto B = std::make_pair(&*OI, *I2);
2400
2402 if (*I1 > *I2)
2404
2406 isDependent(*A.first, A.second, *B.first, B.second);
2408
2409
2410
2411
2412
2413 if (RecordDependences) {
2415 Dependences.emplace_back(A.second, B.second, Type);
2416
2418 RecordDependences = false;
2419 Dependences.clear();
2421 << "Too many dependences, stopped recording\n");
2422 }
2423 }
2425 return false;
2426 }
2427 ++OI;
2428 }
2429 ++AI;
2430 }
2431 }
2432
2433 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2435}
2436
2440 auto I = Accesses.find(Access);
2442 if (I != Accesses.end()) {
2443 transform(I->second, std::back_inserter(Insts),
2444 [&](unsigned Idx) { return this->InstMap[Idx]; });
2445 }
2446
2447 return Insts;
2448}
2449
2451 "NoDep",
2452 "Unknown",
2453 "IndirectUnsafe",
2454 "Forward",
2455 "ForwardButPreventsForwarding",
2456 "Backward",
2457 "BackwardVectorizable",
2458 "BackwardVectorizableButPreventsForwarding"};
2459
2467
2468bool LoopAccessInfo::canAnalyzeLoop() {
2469
2472 << TheLoop->getLocStr() << "\n");
2473
2474
2476 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2477 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2478 return false;
2479 }
2480
2481
2484 dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2485 recordAnalysis("CFGNotUnderstood")
2486 << "loop control flow is not understood by analyzer";
2487 return false;
2488 }
2489
2490
2491
2492
2495 recordAnalysis("CantComputeNumberOfIterations")
2496 << "could not determine number of loop iterations";
2497 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2498 return false;
2499 }
2500
2501 LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "
2503 return true;
2504}
2505
2506bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
2507 const TargetLibraryInfo *TLI,
2508 DominatorTree *DT) {
2509
2512 SmallPtrSet<MDNode *, 8> LoopAliasScopes;
2513
2514
2515 unsigned NumReads = 0;
2516 unsigned NumReadWrites = 0;
2517
2518 bool HasComplexMemInst = false;
2519
2520
2521 HasConvergentOp = false;
2522
2523 PtrRtChecking->Pointers.clear();
2524 PtrRtChecking->Need = false;
2525
2527
2528 const bool EnableMemAccessVersioningOfLoop =
2531
2532
2533
2534 LoopBlocksRPO RPOT(TheLoop);
2535 RPOT.perform(LI);
2536 for (BasicBlock *BB : RPOT) {
2537
2538
2539 for (Instruction &I : *BB) {
2542 HasConvergentOp = true;
2543 }
2544
2545
2546
2547 if (HasComplexMemInst && HasConvergentOp)
2548 return false;
2549
2550
2551 if (HasComplexMemInst)
2552 continue;
2553
2554
2556 for (Metadata *Op : Decl->getScopeList()->operands())
2558
2559
2560
2561
2564 continue;
2565
2566
2567
2568
2569 if (I.mayReadFromMemory()) {
2570 auto hasPointerArgs = [](CallBase *CB) {
2571 return any_of(CB->args(), [](Value const *Arg) {
2572 return Arg->getType()->isPointerTy();
2573 });
2574 };
2575
2576
2577
2578
2581 continue;
2582
2584 if (!Ld) {
2585 recordAnalysis("CantVectorizeInstruction", Ld)
2586 << "instruction cannot be vectorized";
2587 HasComplexMemInst = true;
2588 continue;
2589 }
2590 if (!Ld->isSimple() && !IsAnnotatedParallel) {
2591 recordAnalysis("NonSimpleLoad", Ld)
2592 << "read with atomic ordering or volatile read";
2593 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2594 HasComplexMemInst = true;
2595 continue;
2596 }
2597 NumLoads++;
2600 if (EnableMemAccessVersioningOfLoop)
2601 collectStridedAccess(Ld);
2602 continue;
2603 }
2604
2605
2606 if (I.mayWriteToMemory()) {
2608 if (!St) {
2609 recordAnalysis("CantVectorizeInstruction", St)
2610 << "instruction cannot be vectorized";
2611 HasComplexMemInst = true;
2612 continue;
2613 }
2614 if (!St->isSimple() && !IsAnnotatedParallel) {
2615 recordAnalysis("NonSimpleStore", St)
2616 << "write with atomic ordering or volatile write";
2617 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2618 HasComplexMemInst = true;
2619 continue;
2620 }
2621 NumStores++;
2624 if (EnableMemAccessVersioningOfLoop)
2625 collectStridedAccess(St);
2626 }
2627 }
2628 }
2629
2630 if (HasComplexMemInst)
2631 return false;
2632
2633
2634
2635
2636
2637
2638 if (!Stores.size()) {
2639 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2640 return true;
2641 }
2642
2644 AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,
2645 LoopAliasScopes);
2646
2647
2648
2649
2650
2651
2652 SmallSet<std::pair<Value *, Type *>, 16> Seen;
2653
2654
2655
2656 SmallPtrSet<Value *, 16> UniformStores;
2657
2658 for (StoreInst *ST : Stores) {
2659 Value *Ptr = ST->getPointerOperand();
2660
2661 if (isInvariant(Ptr)) {
2662
2663 StoresToInvariantAddresses.push_back(ST);
2664 HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
2665 !UniformStores.insert(Ptr).second;
2666 }
2667
2668
2669
2671 if (Seen.insert({Ptr, AccessTy}).second) {
2672 ++NumReadWrites;
2673
2675
2676
2677
2678 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2680
2682 [&Accesses, AccessTy, Loc](Value *Ptr) {
2683 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2684 Accesses.addStore(NewLoc, AccessTy);
2685 });
2686 }
2687 }
2688
2689 if (IsAnnotatedParallel) {
2691 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2692 << "checks.\n");
2693 return true;
2694 }
2695
2696 for (LoadInst *LD : Loads) {
2697 Value *Ptr = LD->getPointerOperand();
2698
2699
2700
2701
2702
2703
2704
2705
2706 bool IsReadOnlyPtr = false;
2708 if (Seen.insert({Ptr, AccessTy}).second ||
2709 (*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,
2710 true)) {
2711 ++NumReads;
2712 IsReadOnlyPtr = true;
2713 }
2714
2715
2716
2717 if (UniformStores.contains(Ptr)) {
2718 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2719 "load and uniform store to the same address!\n");
2720 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2721 }
2722
2724
2725
2726
2727 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2729
2731 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2732 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2733 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2734 });
2735 }
2736
2737
2738
2739 if (NumReadWrites == 1 && NumReads == 0) {
2740 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2741 return true;
2742 }
2743
2744
2745
2746 Accesses.buildDependenceSets();
2747
2748
2749
2750 Value *UncomputablePtr = nullptr;
2751 HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
2752 *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial);
2753 if (!HasCompletePtrRtChecking) {
2755 recordAnalysis("CantIdentifyArrayBounds", I)
2756 << "cannot identify array bounds";
2757 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2758 << "the array bounds.\n");
2759 return false;
2760 }
2761
2763 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2764
2765 bool DepsAreSafe = true;
2766 if (Accesses.isDependencyCheckNeeded()) {
2767 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2768 DepsAreSafe =
2769 DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
2770
2772 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2773
2774
2775 Accesses.resetDepChecks(*DepChecker);
2776
2777 PtrRtChecking->reset();
2778 PtrRtChecking->Need = true;
2779
2780 UncomputablePtr = nullptr;
2781 HasCompletePtrRtChecking =
2782 Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
2783 UncomputablePtr, AllowPartial);
2784
2785
2786 if (!HasCompletePtrRtChecking) {
2788 recordAnalysis("CantCheckMemDepsAtRunTime", I)
2789 << "cannot check memory dependencies at runtime";
2790 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2791 return false;
2792 }
2793 DepsAreSafe = true;
2794 }
2795 }
2796
2797 if (HasConvergentOp) {
2798 recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2799 << "cannot add control dependency to convergent operation";
2800 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2801 "would be needed with a convergent operation\n");
2802 return false;
2803 }
2804
2805 if (DepsAreSafe) {
2807 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2808 << (PtrRtChecking->Need ? "" : " don't")
2809 << " need runtime memory checks.\n");
2810 return true;
2811 }
2812
2813 emitUnsafeDependenceRemark();
2814 return false;
2815}
2816
2817void LoopAccessInfo::emitUnsafeDependenceRemark() {
2818 const auto *Deps = getDepChecker().getDependences();
2819 if (!Deps)
2820 return;
2821 const auto *Found =
2822 llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2825 });
2826 if (Found == Deps->end())
2827 return;
2828 MemoryDepChecker::Dependence Dep = *Found;
2829
2830 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2831
2832
2833 bool HasForcedDistribution = false;
2834 std::optional<const MDOperand *> Value =
2837 const MDOperand *Op = *Value;
2840 }
2841
2842 const std::string Info =
2843 HasForcedDistribution
2844 ? "unsafe dependent memory operations in loop."
2845 : "unsafe dependent memory operations in loop. Use "
2846 "#pragma clang loop distribute(enable) to allow loop distribution "
2847 "to attempt to isolate the offending operations into a separate "
2848 "loop";
2849 OptimizationRemarkAnalysis &R =
2850 recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;
2851
2852 switch (Dep.Type) {
2858 R << "\nBackward loop carried data dependence.";
2859 break;
2861 R << "\nForward loop carried data dependence that prevents "
2862 "store-to-load forwarding.";
2863 break;
2865 R << "\nBackward loop carried data dependence that prevents "
2866 "store-to-load forwarding.";
2867 break;
2869 R << "\nUnsafe indirect dependence.";
2870 break;
2872 R << "\nUnknown data dependence.";
2873 break;
2874 }
2875
2876 if (Instruction *I = Dep.getSource(getDepChecker())) {
2877 DebugLoc SourceLoc = I->getDebugLoc();
2879 SourceLoc = DD->getDebugLoc();
2880 if (SourceLoc)
2881 R << " Memory location is the same as accessed at "
2882 << ore::NV("Location", SourceLoc);
2883 }
2884}
2885
2887 const Loop *TheLoop,
2889 assert(TheLoop->contains(BB) && "Unknown block used");
2890
2891
2892 const BasicBlock *Latch = TheLoop->getLoopLatch();
2893 return !DT->dominates(BB, Latch);
2894}
2895
2898 assert(!Report && "Multiple reports generated");
2899
2902
2903 if (I) {
2904 CodeRegion = I->getParent();
2905
2906
2907 if (I->getDebugLoc())
2909 }
2910
2911 Report = std::make_unique(DEBUG_TYPE, RemarkName,
2912 DL, CodeRegion);
2913 return *Report;
2914}
2915
2917 auto *SE = PSE->getSE();
2918 if (TheLoop->isLoopInvariant(V))
2919 return true;
2921 return false;
2924}
2925
2926
2927
2931 if ()
2932 return Ptr;
2933
2935 for (const Use &U : GEP->operands()) {
2937 if (V == Ptr)
2938 V = U;
2939 else
2940
2941 return Ptr;
2942 }
2943 }
2944 return V;
2945}
2946
2947
2948
2951 if (!PtrTy)
2952 return nullptr;
2953
2954
2955
2956
2957 Value *OrigPtr = Ptr;
2958
2961
2962 if (Ptr != OrigPtr)
2963
2965 V = C->getOperand();
2966
2968 return nullptr;
2969
2970
2971
2973 return nullptr;
2974
2975
2977 return V;
2978
2981 return V;
2982
2983 return nullptr;
2984}
2985
2986void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2988 if (!Ptr)
2989 return;
2990
2991
2992
2993
2994
2995
2996
2998 if (!StrideExpr)
2999 return;
3000
3002 return;
3003
3004 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
3005 "versioning:");
3006 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
3007
3009 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");
3010 return;
3011 }
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
3027
3028
3029
3030
3032 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
3033 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
3034 const SCEV *CastedStride = StrideExpr;
3035 const SCEV *CastedBECount = MaxBTC;
3036 ScalarEvolution *SE = PSE->getSE();
3037 if (BETypeSizeBits >= StrideTypeSizeBits)
3039 else
3041 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
3042
3043
3044
3047 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
3048 "Stride==1 predicate will imply that the loop executes "
3049 "at most once.\n");
3050 return;
3051 }
3052 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
3053
3054
3055
3056 const SCEV *StrideBase = StrideExpr;
3058 StrideBase = C->getOperand();
3060}
3061
3068 PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
3069 unsigned MaxTargetVectorWidthInBits = std::numeric_limits::max();
3070 if (TTI && ->enableScalableVectorization())
3071
3072
3073 MaxTargetVectorWidthInBits =
3075
3076 DepChecker = std::make_unique(
3077 *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3078 PtrRtChecking =
3079 std::make_unique(*DepChecker, SE, LoopGuards);
3080 if (canAnalyzeLoop())
3081 CanVecMem = analyzeLoop(AA, LI, TLI, DT);
3082}
3083
3085 if (CanVecMem) {
3086 OS.indent(Depth) << "Memory dependences are safe";
3089 OS << " with a maximum safe vector width of "
3093 OS << ", with a maximum safe store-load forward width of " << SLDist
3094 << " bits";
3095 }
3096 if (PtrRtChecking->Need)
3097 OS << " with run-time checks";
3098 OS << "\n";
3099 }
3100
3101 if (HasConvergentOp)
3102 OS.indent(Depth) << "Has convergent operation in loop\n";
3103
3104 if (Report)
3105 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
3106
3107 if (auto *Dependences = DepChecker->getDependences()) {
3109 for (const auto &Dep : *Dependences) {
3110 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
3111 OS << "\n";
3112 }
3113 } else
3114 OS.indent(Depth) << "Too many dependences, not recorded\n";
3115
3116
3117 PtrRtChecking->print(OS, Depth);
3118 if (PtrRtChecking->Need && !HasCompletePtrRtChecking)
3119 OS.indent(Depth) << "Generated run-time checks are incomplete\n";
3120 OS << "\n";
3121
3123 << "Non vectorizable stores to invariant address were "
3124 << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3125 HasLoadStoreDependenceInvolvingLoopInvariantAddress
3126 ? ""
3127 : "not ")
3128 << "found in loop.\n";
3129
3130 OS.indent(Depth) << "SCEV assumptions:\n";
3131 PSE->getPredicate().print(OS, Depth);
3132
3133 OS << "\n";
3134
3135 OS.indent(Depth) << "Expressions re-written:\n";
3136 PSE->print(OS, Depth);
3137}
3138
3140 bool AllowPartial) {
3141 const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);
3142
3143
3144
3145 if (Inserted || It->second->hasAllowPartial() != AllowPartial)
3146 It->second = std::make_unique(&L, &SE, TTI, TLI, &AA, &DT,
3147 &LI, AC, AllowPartial);
3148
3149 return *It->second;
3150}
3152
3153
3154
3155
3156 for (const auto &[L, LAI] : LoopAccessInfoMap) {
3157 if (LAI->getRuntimePointerChecking()->getChecks().empty() &&
3158 LAI->getPSE().getPredicate().isAlwaysTrue())
3159 continue;
3160 LoopAccessInfoMap.erase(L);
3161 }
3162}
3163
3166 FunctionAnalysisManager::Invalidator &Inv) {
3167
3170
3171 return true;
3172
3173
3174
3175
3176 return Inv.invalidate<AAManager>(F, PA) ||
3180}
3181
3193
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)
Definition LoopAccessAnalysis.cpp:1112
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static const SCEV * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition LoopAccessAnalysis.cpp:2949
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of t...
Definition LoopAccessAnalysis.cpp:213
static std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)
Try to compute a constant stride for AR.
Definition LoopAccessAnalysis.cpp:969
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap(ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)
Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
Definition LoopAccessAnalysis.cpp:753
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
Definition LoopAccessAnalysis.cpp:1073
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, Type *AccessTy, const Loop *L, bool Assume, const DominatorTree &DT, std::optional< int64_t > Stride=std::nullopt)
Check whether AR is a non-wrapping AddRec.
Definition LoopAccessAnalysis.cpp:1019
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)
Given a dependence-distance Dist between two memory accesses, that have strides in the same direction...
Definition LoopAccessAnalysis.cpp:1925
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
Definition LoopAccessAnalysis.cpp:1980
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Definition LoopAccessAnalysis.cpp:557
static const SCEV * mulSCEVOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A * B, if it is guaranteed not to unsigned wrap.
Definition LoopAccessAnalysis.cpp:204
static Value * getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If Ptr is a GEP, which has a loop-variant operand, return that operand.
Definition LoopAccessAnalysis.cpp:2928
static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
static cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
static const SCEV * addSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A + B, if it is guaranteed not to unsigned wrap.
Definition LoopAccessAnalysis.cpp:195
This header provides classes for managing per-loop analyses.
This file provides utility analysis objects describing memory locations.
FunctionAnalysisManager FAM
This file defines the PointerIntPair class.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This pass exposes codegen information to IR-level passes.
static const X86InstrFMA3Group Groups[]
A manager for alias analyses.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
APInt abs() const
Get the absolute value.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
int64_t getSExtValue() const
Get sign extended value.
This templated class represents "all analyses that operate over " (e....
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isConvergent() const
Determine if the invoke is convergent.
@ ICMP_UGE
unsigned greater or equal
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
iterator_range< member_iterator > members(const ECValue &ECV) const
bool contains(const ElemTy &V) const
Returns true if V is contained an equivalence class.
const ECValue & insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
member_iterator findLeader(const ElemTy &V) const
findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
PointerType * getType() const
Global values are always pointers.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
Value * getPointerOperand()
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI Result run(Function &F, FunctionAnalysisManager &AM)
Definition LoopAccessAnalysis.cpp:3182
LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Definition LoopAccessAnalysis.cpp:3164
LLVM_ABI void clear()
Definition LoopAccessAnalysis.cpp:3151
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Definition LoopAccessAnalysis.cpp:3139
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
LLVM_ABI bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
Definition LoopAccessAnalysis.cpp:2916
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
Definition LoopAccessAnalysis.cpp:3084
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition LoopAccessAnalysis.cpp:2886
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial=false)
Definition LoopAccessAnalysis.cpp:3062
Analysis pass that exposes the LoopInfo for a function.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Represents a single loop in the control flow graph.
std::string getLocStr() const
Return a string containing the debug location of the loop (file name + line number if present,...
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
ArrayRef< MDOperand > operands() const
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyStoreLoadForwardDistances() const
Return true if there are no store-load forwarding dependencies.
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, const MemAccessInfoList &CheckDeps)
Check whether the dependencies between the accesses are safe, and records the dependence information ...
Definition LoopAccessAnalysis.cpp:2364
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
bool shouldRetryWithRuntimeChecks() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
SmallVector< MemAccessInfo, 8 > MemAccessInfoList
LLVM_ABI SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
Definition LoopAccessAnalysis.cpp:2438
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
@ PossiblySafeWithRtChecks
LLVM_ABI void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
Definition LoopAccessAnalysis.cpp:1780
PointerIntPair< Value *, 1, bool > MemAccessInfo
uint64_t getStoreLoadForwardSafeDistanceInBits() const
Return safe power-of-2 number of elements, which do not prevent store-load forwarding,...
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
LLVM_ABI void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition LoopAccessAnalysis.cpp:760
LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition LoopAccessAnalysis.cpp:546
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
Definition LoopAccessAnalysis.cpp:779
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
Definition LoopAccessAnalysis.cpp:539
friend struct RuntimeCheckingPtrGroup
static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition LoopAccessAnalysis.cpp:728
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
LLVM_ABI void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
Definition LoopAccessAnalysis.cpp:400
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?
LLVM_ABI const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
APInt getSignedRangeMin(const SCEV *S)
Determine the min of the signed range for a particular SCEV.
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)
Return an expression for a TypeSize.
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool canBeFreed() const
Return true if the memory object referred to by V can by freed in the scope for which the SSA value d...
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
is_undef_or_poison m_scev_UndefOrPoison()
Match an SCEVUnknown wrapping undef or poison.
class_match< const SCEVConstant > m_SCEVConstant()
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
specificscev_ty m_scev_Specific(const SCEV *S)
Match if we have a specific specified SCEV.
class_match< const SCEV > m_SCEV()
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)
Check whether Metadata has a Value.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI bool willNotFreeBetween(const Instruction *Assume, const Instruction *CtxI)
Returns true, if no instruction between Assume and CtxI may free memory and the function is marked as...
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI RetainedKnowledge getKnowledgeForValue(const Value *V, ArrayRef< Attribute::AttrKind > AttrKinds, AssumptionCache &AC, function_ref< bool(RetainedKnowledge, Instruction *, const CallBase::BundleOpInfo *)> Filter=[](auto...) { return true;})
Return a valid Knowledge associated to the Value V if its Attribute kind is in AttrKinds and it match...
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto dyn_cast_or_null(const Y &Val)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< int64_t > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
Definition LoopAccessAnalysis.cpp:1656
LLVM_ABI bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
Definition LoopAccessAnalysis.cpp:1725
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition LoopAccessAnalysis.cpp:155
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
Definition LoopAccessAnalysis.cpp:1766
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Calculate Start and End points of memory access using exact backedge taken count BTC if computable or...
Definition LoopAccessAnalysis.cpp:318
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition LoopAccessAnalysis.cpp:1622
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
IR Values for the lower and upper bounds of a pointer evolution.
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * TBAA
The tag for type-based alias analysis.
MDNode * NoAlias
The tag specifying the noalias scope.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
LLVM_ABI bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Definition LoopAccessAnalysis.cpp:1834
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
LLVM_ABI bool isForward() const
Lexically forward dependence.
Definition LoopAccessAnalysis.cpp:1838
LLVM_ABI bool isBackward() const
Lexically backward dependence.
Definition LoopAccessAnalysis.cpp:1817
LLVM_ABI void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Definition LoopAccessAnalysis.cpp:2460
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
@ BackwardVectorizableButPreventsForwarding
@ ForwardButPreventsForwarding
static LLVM_ABI const char * DepName[]
String version of the types.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Definition LoopAccessAnalysis.cpp:1799
Represent one information held inside an operand bundle of an llvm.assume.
unsigned AddressSpace
Address space of the involved pointers.
LLVM_ABI bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
Definition LoopAccessAnalysis.cpp:565
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
LLVM_ABI RuntimeCheckingPtrGroup(unsigned Index, const RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
Definition LoopAccessAnalysis.cpp:183
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI unsigned VectorizationFactor
VF as overridden by the user.
static LLVM_ABI unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition LoopAccessAnalysis.cpp:151
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static LLVM_ABI bool HoistRuntimeChecks
Function object to check whether the first component of a container supported by std::get (like std::...