LLVM: lib/Analysis/LoopAccessAnalysis.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
61#include
62#include
63#include
64#include
65#include
66#include
67#include
68
69using namespace llvm;
71
72#define DEBUG_TYPE "loop-accesses"
73
76 cl::desc("Sets the SIMD width. Zero is autoselect."),
79
82 cl::desc("Sets the vectorization interleave count. "
83 "Zero is autoselect."),
87
89 "runtime-memory-check-threshold", cl::Hidden,
90 cl::desc("When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
94
95
97 "memory-check-merge-threshold", cl::Hidden,
98 cl::desc("Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
101
102
104
105
108 cl::desc("Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
111
112
113
114
115
116
117
118
119
120
121
122
125 cl::desc("Enable symbolic stride memory access versioning"));
126
127
128
130 "store-to-load-forwarding-conflict-detection", cl::Hidden,
131 cl::desc("Enable conflict detection in loop-access analysis"),
133
136 cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
138
140 "laa-speculate-unit-stride", cl::Hidden,
141 cl::desc("Speculate that non-constant strides are unit in LAA"),
143
147 "Hoist inner loop runtime memory checks to outer loop if possible"),
150
152 return ::VectorizationInterleave.getNumOccurrences() > 0;
153}
154
159
160
161
162 const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);
163 if (!StrideSCEV)
164
165 return OrigSCEV;
166
167
168
169
170
172
177
178 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
179 << " by: " << *Expr << "\n");
180 return Expr;
181}
182
185 : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
190 Members.push_back(Index);
191}
192
193
194
201
202
203
210
211
212
217 std::optionalScalarEvolution::LoopGuards &LoopGuards) {
220 if (!StartPtr)
221 return false;
223 bool CheckForNonNull, CheckForFreed;
224 Value *StartPtrV = StartPtr->getValue();
226 DL, CheckForNonNull, CheckForFreed);
227
228 if (DerefBytes && (CheckForNonNull || CheckForFreed))
229 return false;
230
233 const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
234
235
236 Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
237 if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
239 CtxI = LoopPred->getTerminator();
240 }
245 return false;
248 return false;
249 DerefRK = std::max(DerefRK, RK);
250 return true;
251 });
252 if (DerefRK) {
253 DerefBytesSCEV =
255 }
256
257 if (DerefBytesSCEV->isZero())
258 return false;
259
262 return false;
263
266
267
269 return false;
272
273 if (!LoopGuards)
276
277 const SCEV *OffsetAtLastIter =
279 if (!OffsetAtLastIter) {
280
281
284 return false;
286 MaxBTC, WiderTy);
287 OffsetAtLastIter =
289 if (!OffsetAtLastIter)
290 return false;
291 }
292
295 if (!OffsetEndBytes)
296 return false;
297
298 if (IsKnownNonNegative) {
299
300
301
303 if (!EndBytes)
304 return false;
305
306 DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards);
308 }
309
310
311
312
316}
317
319 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
321 DenseMap<std::pair<const SCEV *, Type *>,
322 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
324 std::optionalScalarEvolution::LoopGuards &LoopGuards) {
325 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
328 {{PtrExpr, AccessTy},
330 if (!Ins)
331 return Iter->second;
332 PtrBoundsPair = &Iter->second;
333 }
334
335 const SCEV *ScStart;
336 const SCEV *ScEnd;
337
339 Type *IdxTy = DL.getIndexType(PtrExpr->getType());
342 ScStart = ScEnd = PtrExpr;
344 ScStart = AR->getStart();
346
347
348
349
350 ScEnd = AR->evaluateAtIteration(BTC, *SE);
351 else {
352
353
354
355
356
357
358
360 DT, AC, LoopGuards)) {
361 ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
362 } else {
367 AR->getType())));
368 }
369 }
370 const SCEV *Step = AR->getStepRecurrence(*SE);
371
372
373
375 if (CStep->getValue()->isNegative())
377 } else {
378
379
380
381 ScStart = SE->getUMinExpr(ScStart, ScEnd);
382 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
383 }
384 } else
386
389
390
391 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
392
393 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
395 *PtrBoundsPair = Res;
396 return Res;
397}
398
399
400
402 Type *AccessTy, bool WritePtr,
403 unsigned DepSetId, unsigned ASId,
405 bool NeedsFreeze) {
409 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
410 &DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);
413 "must be able to compute both start and end expressions");
414 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
415 NeedsFreeze);
416}
417
418bool RuntimePointerChecking::tryToCreateDiffCheck(
420
421
422
424 return false;
425
428
429
430
433 return false;
434
439
440
441 if (AccSrc.size() != 1 || AccSink.size() != 1)
442 return false;
443
444
445 if (AccSink[0] < AccSrc[0])
447
449 const SCEV *SrcStart;
450 const SCEV *SinkStart;
452 if ((Src->Expr,
455 (Sink->Expr,
458 return false;
459
467 return false;
468
470 unsigned AllocSize =
471 std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
472
473
474
475
477 return false;
478
482
483
486
491 return false;
492
493
494
495
496
501 const Loop *StartARLoop = SrcStartAR->getLoop();
502 if (StartARLoop == SinkStartAR->getLoop() &&
504
505
506
507 SrcStartAR->getStepRecurrence(*SE) !=
508 SinkStartAR->getStepRecurrence(*SE)) {
509 LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
510 "cannot be hoisted out of the outer loop\n");
511 return false;
512 }
513 }
514
515 LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
516 << "SrcStart: " << *SrcStartInt << '\n'
517 << "SinkStartInt: " << *SinkStartInt << '\n');
518 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
519 Src->NeedsFreeze || Sink->NeedsFreeze);
520 return true;
521}
522
524 SmallVector<RuntimePointerCheck, 4> Checks;
525
527 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
530
532 CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);
533 Checks.emplace_back(&CGI, &CGJ);
534 }
535 }
536 }
537 return Checks;
538}
539
542 assert(Checks.empty() && "Checks is not empty");
543 groupChecks(DepCands, UseDependencies);
545}
546
549 for (const auto &I : M.Members)
550 for (const auto &J : N.Members)
552 return true;
553 return false;
554}
555
556
557
561 if (!Diff)
562 return nullptr;
563 return Diff->isNegative() ? J : I;
564}
565
569 Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
570 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
571 RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
572}
573
575 const SCEV *End, unsigned AS,
579 "all pointers in a checking group must be in the same address space");
580
581
582
583
585 if (!Min0)
586 return false;
587
589 if (!Min1)
590 return false;
591
592
593 if (Min0 == Start)
594 Low = Start;
595
596
597 if (Min1 != End)
599
600 Members.push_back(Index);
602 return true;
603}
604
605void RuntimePointerChecking::groupChecks(
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651 if (!UseDependencies) {
652 for (unsigned I = 0; I < Pointers.size(); ++I)
654 return;
655 }
656
657 unsigned TotalComparisons = 0;
658
660 for (unsigned Index = 0; Index < Pointers.size(); ++Index)
661 PositionMap[Pointers[Index].PointerValue].push_back(Index);
662
663
664
666
667
668
669
670 for (unsigned I = 0; I < Pointers.size(); ++I) {
671
672
674 continue;
675
678
680
681
682
683
684
685
687 auto PointerI = PositionMap.find(M.getPointer());
688
689
690 if (PointerI == PositionMap.end())
691 continue;
692 for (unsigned Pointer : PointerI->second) {
693 bool Merged = false;
694
695 Seen.insert(Pointer);
696
697
698
700
701
702
703
705 break;
706
707 TotalComparisons++;
708
709 if (Group.addPointer(Pointer, *this)) {
710 Merged = true;
711 break;
712 }
713 }
714
715 if (!Merged)
716
717
718
719 Groups.emplace_back(Pointer, *this);
720 }
721 }
722
723
724
726 }
727}
728
731 unsigned PtrIdx2) {
732 return (PtrToPartition[PtrIdx1] != -1 &&
733 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
734}
735
739
740
742 return false;
743
744
746 return false;
747
748
750}
751
752
756 for (const auto &[Idx, CG] : enumerate(CheckingGroups))
757 PtrIndices[&CG] = Idx;
758 return PtrIndices;
759}
760
763 unsigned Depth) const {
764 unsigned N = 0;
766 for (const auto &[Check1, Check2] : Checks) {
767 const auto &First = Check1->Members, &Second = Check2->Members;
768 OS.indent(Depth) << "Check " << N++ << ":\n";
769 OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)
770 << ":\n";
771 for (unsigned K : First)
773 OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)
774 << ":\n";
775 for (unsigned K : Second)
777 }
778}
779
781
782 OS.indent(Depth) << "Run-time memory checks:\n";
784
785 OS.indent(Depth) << "Grouped accesses:\n";
788 OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";
789 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
790 << ")\n";
791 for (unsigned Member : CG.Members) {
793 }
794 }
795}
796
797namespace {
798
799
800
801
802
803class AccessAnalysis {
804public:
805
808
813 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),
814 PSE(PSE), LoopAliasScopes(LoopAliasScopes) {
815
816 BAA.enableCrossIterationMode();
817 }
818
819
822 AST.add(adjustLoc(Loc));
823 Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
824 if (IsReadOnly)
825 ReadOnlyPtr.insert(Ptr);
826 }
827
828
829 void addStore(const MemoryLocation &Loc, Type *AccessTy) {
831 AST.add(adjustLoc(Loc));
832 Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
833 }
834
835
836
837
838
839
840
841
842 bool createCheckForAccess(RuntimePointerChecking &RtCheck,
843 MemAccessInfo Access, Type *AccessTy,
844 const DenseMap<Value *, const SCEV *> &Strides,
845 DenseMap<Value *, unsigned> &DepSetId,
846 Loop *TheLoop, unsigned &RunningDepId,
847 unsigned ASId, bool Assume);
848
849
850
851
852
853
854
855
856
857 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
858 const DenseMap<Value *, const SCEV *> &Strides,
859 Value *&UncomputablePtr, bool AllowPartial);
860
861
862
863 void buildDependenceSets() {
864 processMemAccesses();
865 }
866
867
868
869
870
871
872 bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
873
874
875 void resetDepChecks(MemoryDepChecker &DepChecker) {
876 CheckDeps.clear();
878 }
879
880 const MemAccessInfoList &getDependenciesToCheck() const { return CheckDeps; }
881
882private:
883 typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
884
885
886
887 MemoryLocation adjustLoc(MemoryLocation Loc) const {
888
889
893 return Loc;
894 }
895
896
897 MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
898 if (!ScopeList)
899 return nullptr;
900
901
902
904 return LoopAliasScopes.contains(cast(Scope));
905 }))
906 return nullptr;
907
908 return ScopeList;
909 }
910
911
912
913 void processMemAccesses();
914
915
916
918
919
920 const Loop *TheLoop;
921
922
923 MemAccessInfoList CheckDeps;
924
925
926 SmallPtrSet<Value*, 16> ReadOnlyPtr;
927
928
929 BatchAAResults BAA;
930
931
932
933 AliasSetTracker AST;
934
935
936 const LoopInfo *LI;
937
938
939 DominatorTree &DT;
940
941
942
943
945
946
947
948
949
950
951
952
953 bool IsRTCheckAnalysisNeeded = false;
954
955
956 PredicatedScalarEvolution &PSE;
957
958 DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
959
960
961
962 SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
963};
964
965}
966
967
968
969static std::optional<int64_t>
973 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
974 << "\n");
975 return std::nullopt;
976 }
977
978
979 if (Lp != AR->getLoop()) {
981 dbgs() << "LAA: Bad stride - Not striding over innermost loop ";
982 if (Ptr)
983 dbgs() << *Ptr << " ";
984
985 dbgs() << "SCEV: " << *AR << "\n";
986 });
987 return std::nullopt;
988 }
989
990
992
993
994 const APInt *APStepVal;
997 dbgs() << "LAA: Bad stride - Not a constant strided ";
998 if (Ptr)
999 dbgs() << *Ptr << " ";
1000 dbgs() << "SCEV: " << *AR << "\n";
1001 });
1002 return std::nullopt;
1003 }
1004
1006 TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1008
1009
1010 std::optional<int64_t> StepVal = APStepVal->trySExtValue();
1011 if (!StepVal)
1012 return std::nullopt;
1013
1014
1015 return *StepVal % Size ? std::nullopt : std::make_optional(*StepVal / Size);
1016}
1017
1018
1019
1021 Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,
1023 std::optional<int64_t> Stride = std::nullopt) {
1024
1026 return true;
1027
1029 return true;
1030
1031
1032
1033
1034
1035
1037 GEP && GEP->hasNoUnsignedSignedWrap()) {
1038
1039
1040 if (L->getHeader() == L->getLoopLatch() ||
1042 if (getLoadStorePointerOperand(U) != GEP)
1043 return false;
1044 BasicBlock *UserBB = cast(U)->getParent();
1045 return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);
1046 }))
1047 return true;
1048 }
1049
1050 if (!Stride)
1052 if (Stride) {
1053
1054
1055
1058 (Stride == 1 || Stride == -1))
1059 return true;
1060 }
1061
1062 if (Ptr && Assume) {
1065 << "LAA: Pointer: " << *Ptr << "\n"
1066 << "LAA: SCEV: " << *AR << "\n"
1067 << "LAA: Added an overflow assumption\n");
1068 return true;
1069 }
1070
1071 return false;
1072}
1073
1079
1080 while (!WorkList.empty()) {
1082 if (!Visited.insert(Ptr).second)
1083 continue;
1085
1086
1087
1088 if (PN && InnermostLoop.contains(PN->getParent()) &&
1089 PN->getParent() != InnermostLoop.getHeader()) {
1091 } else
1092 AddPointer(Ptr);
1093 }
1094}
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1116 unsigned Depth) {
1117
1118
1119
1120
1125 return;
1126 }
1127
1129
1132 };
1133
1134 auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
1135 switch (Opcode) {
1136 case Instruction::Add:
1138 case Instruction::Sub:
1140 default:
1141 llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
1142 }
1143 };
1144
1146 unsigned Opcode = I->getOpcode();
1147 switch (Opcode) {
1148 case Instruction::GetElementPtr: {
1150 Type *SourceTy = GEP->getSourceElementType();
1151
1152
1153 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
1155 break;
1156 }
1161
1162
1163 bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
1164 any_of(OffsetScevs, UndefPoisonCheck);
1165
1166
1167
1168
1169 if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
1170 BaseScevs.push_back(BaseScevs[0]);
1171 else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
1172 OffsetScevs.push_back(OffsetScevs[0]);
1173 else {
1174 ScevList.emplace_back(Scev, NeedsFreeze);
1175 break;
1176 }
1177
1179
1180
1181
1182
1184
1185 for (auto [B, O] : zip(BaseScevs, OffsetScevs)) {
1188
1189
1193 }
1194 break;
1195 }
1196 case Instruction::Select: {
1198
1199
1200
1203 if (ChildScevs.size() == 2)
1205 else
1207 break;
1208 }
1209 case Instruction::PHI: {
1211
1212
1213
1214 if (I->getNumOperands() == 2) {
1217 }
1218 if (ChildScevs.size() == 2)
1220 else
1222 break;
1223 }
1224 case Instruction::Add:
1225 case Instruction::Sub: {
1230
1231
1232 bool NeedsFreeze =
1233 any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
1234
1235
1236
1237
1238 if (LScevs.size() == 2 && RScevs.size() == 1)
1240 else if (RScevs.size() == 2 && LScevs.size() == 1)
1242 else {
1243 ScevList.emplace_back(Scev, NeedsFreeze);
1244 break;
1245 }
1246
1247 for (auto [L, R] : zip(LScevs, RScevs))
1248 ScevList.emplace_back(GetBinOpExpr(Opcode, get<0>(L), get<0>(R)),
1249 NeedsFreeze);
1250 break;
1251 }
1252 default:
1253
1254 LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1256 break;
1257 }
1258}
1259
1260bool AccessAnalysis::createCheckForAccess(
1264 unsigned &RunningDepId, unsigned ASId, bool Assume) {
1268
1272 "Must have some runtime-check pointer candidates");
1273
1274
1275
1276 auto IsLoopInvariantOrAR =
1280 };
1281 if (RTCheckPtrs.size() == 2 && all_of(RTCheckPtrs, IsLoopInvariantOrAR)) {
1282 LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n";
1283 for (const auto &[Idx, Q] : enumerate(RTCheckPtrs)) dbgs()
1284 << "\t(" << Idx << ") " << *Q.getPointer() << "\n");
1285 } else {
1287 }
1288
1289
1290
1291 for (auto &P : RTCheckPtrs) {
1292
1294 continue;
1295
1297 if (!AR && Assume)
1300 return false;
1301
1302
1303
1304 if (RTCheckPtrs.size() == 1) {
1305 AR =
1307 P.setPointer(AR);
1308 }
1309
1310 if ((PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
1311 TheLoop, Assume, DT))
1312 return false;
1313 }
1314
1315 for (const auto &[PtrExpr, NeedsFreeze] : RTCheckPtrs) {
1316
1317 unsigned DepId;
1318
1319 if (isDependencyCheckNeeded()) {
1321 unsigned &LeaderId = DepSetId[Leader];
1322 if (!LeaderId)
1323 LeaderId = RunningDepId++;
1324 DepId = LeaderId;
1325 } else
1326
1327 DepId = RunningDepId++;
1328
1329 bool IsWrite = Access.getInt();
1330 RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1331 NeedsFreeze);
1332 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1333 }
1334
1335 return true;
1336}
1337
1338bool AccessAnalysis::canCheckPtrAtRT(
1341 bool AllowPartial) {
1342
1343
1344 bool CanDoRT = true;
1345
1346 bool MayNeedRTCheck = false;
1347 if (!IsRTCheckAnalysisNeeded) return true;
1348
1349 bool IsDepCheckNeeded = isDependencyCheckNeeded();
1350
1351
1352
1353 unsigned ASId = 0;
1354 for (const auto &AS : AST) {
1355 int NumReadPtrChecks = 0;
1356 int NumWritePtrChecks = 0;
1357 bool CanDoAliasSetRT = true;
1358 ++ASId;
1359 auto ASPointers = AS.getPointers();
1360
1361
1362
1363 unsigned RunningDepId = 1;
1365
1367
1368
1369
1371 for (const Value *ConstPtr : ASPointers) {
1372 Value *Ptr = const_cast<Value *>(ConstPtr);
1373 bool IsWrite = Accesses.contains(MemAccessInfo(Ptr, true));
1374 if (IsWrite)
1375 ++NumWritePtrChecks;
1376 else
1377 ++NumReadPtrChecks;
1379 }
1380
1381
1382
1383 if (NumWritePtrChecks == 0 ||
1384 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1385 assert((ASPointers.size() <= 1 ||
1387 [this](const Value *Ptr) {
1388 MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1389 true);
1390 return !DepCands.contains(AccessWrite);
1391 })) &&
1392 "Can only skip updating CanDoRT below, if all entries in AS "
1393 "are reads or there is at most 1 entry");
1394 continue;
1395 }
1396
1397 for (auto &Access : AccessInfos) {
1399 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1400 DepSetId, TheLoop, RunningDepId, ASId,
1401 false)) {
1402 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1403 << *Access.getPointer() << '\n');
1405 CanDoAliasSetRT = false;
1406 }
1407 }
1408 }
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1420
1421
1422
1423 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1424
1425
1426
1427 CanDoAliasSetRT = true;
1428 for (const auto &[Access, AccessTy] : Retries) {
1429 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1430 DepSetId, TheLoop, RunningDepId, ASId,
1431 true)) {
1432 CanDoAliasSetRT = false;
1433 UncomputablePtr = Access.getPointer();
1434 if (!AllowPartial)
1435 break;
1436 }
1437 }
1438 }
1439
1440 CanDoRT &= CanDoAliasSetRT;
1441 MayNeedRTCheck |= NeedsAliasSetRTCheck;
1442 ++ASId;
1443 }
1444
1445
1446
1447
1448
1449
1450 unsigned NumPointers = RtCheck.Pointers.size();
1451 for (unsigned i = 0; i < NumPointers; ++i) {
1452 for (unsigned j = i + 1; j < NumPointers; ++j) {
1453
1454 if (RtCheck.Pointers[i].DependencySetId ==
1455 RtCheck.Pointers[j].DependencySetId)
1456 continue;
1457
1458 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1459 continue;
1460
1463
1466 if (ASi != ASj) {
1468 dbgs() << "LAA: Runtime check would require comparison between"
1469 " different address spaces\n");
1470 return false;
1471 }
1472 }
1473 }
1474
1475 if (MayNeedRTCheck && (CanDoRT || AllowPartial))
1477
1479 << " pointer comparisons.\n");
1480
1481
1482
1483
1485
1486 bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1487 assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&
1488 "CanDoRTIfNeeded depends on RtCheck.Need");
1489 if (!CanDoRTIfNeeded && !AllowPartial)
1490 RtCheck.reset();
1491 return CanDoRTIfNeeded;
1492}
1493
1494void AccessAnalysis::processMemAccesses() {
1495
1496
1497
1498
1499 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1504 dbgs() << "\t" << *A.getPointer() << " ("
1505 << (A.getInt()
1506 ? "write"
1507 : (ReadOnlyPtr.contains(A.getPointer()) ? "read-only"
1508 : "read"))
1509 << ")\n";
1510 });
1511
1512
1513
1514
1515
1516 for (const auto &AS : AST) {
1517
1518
1519
1520 auto ASPointers = AS.getPointers();
1521
1522 bool SetHasWrite = false;
1523
1524
1525
1527 UnderlyingObjToAccessMap;
1528 UnderlyingObjToAccessMap ObjToLastAccess;
1529
1530
1531 PtrAccessMap DeferredAccesses;
1532
1533
1534
1535 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
1536 bool UseDeferred = SetIteration > 0;
1537 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1538
1539 for (const Value *ConstPtr : ASPointers) {
1540 Value *Ptr = const_cast<Value *>(ConstPtr);
1541
1542
1543
1544 for (const auto &[AC, _] : S) {
1545 if (AC.getPointer() != Ptr)
1546 continue;
1547
1548 bool IsWrite = AC.getInt();
1549
1550
1551
1552 bool IsReadOnlyPtr = ReadOnlyPtr.contains(Ptr) && !IsWrite;
1553 if (UseDeferred && !IsReadOnlyPtr)
1554 continue;
1555
1556
1557 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1558 S.contains(MemAccessInfo(Ptr, false))) &&
1559 "Alias-set pointer not in the access set?");
1560
1561 MemAccessInfo Access(Ptr, IsWrite);
1563
1564
1565
1566
1567
1568
1569 if (!UseDeferred && IsReadOnlyPtr) {
1570
1571
1572 DeferredAccesses.insert({Access, {}});
1573 continue;
1574 }
1575
1576
1577
1578
1579
1580 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
1581 CheckDeps.push_back(Access);
1582 IsRTCheckAnalysisNeeded = true;
1583 }
1584
1585 if (IsWrite)
1586 SetHasWrite = true;
1587
1588
1589
1591 UOs = {};
1594 << "Underlying objects for pointer " << *Ptr << "\n");
1595 for (const Value *UnderlyingObj : UOs) {
1596
1597
1602 continue;
1603
1604 auto [It, Inserted] = ObjToLastAccess.try_emplace(
1605 {UnderlyingObj,
1608 if (!Inserted) {
1610 It->second = Access;
1611 }
1612
1613 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
1614 }
1615 }
1616 }
1617 }
1618 }
1619}
1620
1621
1622std::optional<int64_t>
1626 bool Assume, bool ShouldCheckWrap) {
1629 return 0;
1630
1632
1634 if (Assume && !AR)
1636
1637 if (!AR) {
1638 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1639 << " SCEV: " << *PtrScev << "\n");
1640 return std::nullopt;
1641 }
1642
1643 std::optional<int64_t> Stride =
1645 if (!ShouldCheckWrap || !Stride)
1646 return Stride;
1647
1648 if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride))
1649 return Stride;
1650
1652 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1653 << *Ptr << " SCEV: " << *AR << "\n");
1654 return std::nullopt;
1655}
1656
1661 bool StrictCheck, bool CheckType) {
1662 assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1663
1664
1665 if (PtrA == PtrB)
1666 return 0;
1667
1668
1669 if (CheckType && ElemTyA != ElemTyB)
1670 return std::nullopt;
1671
1674
1675
1676 if (ASA != ASB)
1677 return std::nullopt;
1678 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1679
1680 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1682 DL, OffsetA, true);
1684 DL, OffsetB, true);
1685
1686 std::optional<int64_t> Val;
1687 if (PtrA1 == PtrB1) {
1688
1689
1692
1693 if (ASA != ASB)
1694 return std::nullopt;
1695
1696 IdxWidth = DL.getIndexSizeInBits(ASA);
1697 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1698 OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1699
1700 OffsetB -= OffsetA;
1702 } else {
1703
1704 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1705 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1706 std::optional Diff =
1708 if (!Diff)
1709 return std::nullopt;
1710 Val = Diff->trySExtValue();
1711 }
1712
1713 if (!Val)
1714 return std::nullopt;
1715
1716 int64_t Size = DL.getTypeStoreSize(ElemTyA);
1717 int64_t Dist = *Val / Size;
1718
1719
1720
1721 if (!StrictCheck || Dist * Size == Val)
1722 return Dist;
1723 return std::nullopt;
1724}
1725
1730 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1731 "Expected list of pointer operands.");
1732
1733
1734 Value *Ptr0 = VL[0];
1735
1736 using DistOrdPair = std::pair<int64_t, unsigned>;
1738 std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1739 Offsets.emplace(0, 0);
1740 bool IsConsecutive = true;
1742 std::optional<int64_t> Diff =
1744 true);
1745 if (!Diff)
1746 return false;
1747
1748
1749 int64_t Offset = *Diff;
1750 auto [It, IsInserted] = Offsets.emplace(Offset, Idx);
1751 if (!IsInserted)
1752 return false;
1753
1754 IsConsecutive &= std::next(It) == Offsets.end();
1755 }
1756 SortedIndices.clear();
1757 if (!IsConsecutive) {
1758
1760 for (auto [Idx, Off] : enumerate(Offsets))
1761 SortedIndices[Idx] = Off.second;
1762 }
1763 return true;
1764}
1765
1766
1771 if (!PtrA || !PtrB)
1772 return false;
1775 std::optional<int64_t> Diff =
1777 true, CheckType);
1778 return Diff == 1;
1779}
1780
1784 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1785 InstMap.push_back(SI);
1786 ++AccessIdx;
1787 });
1788}
1789
1792 [this, LI](Value *Ptr) {
1793 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1794 InstMap.push_back(LI);
1795 ++AccessIdx;
1796 });
1797}
1798
1801 switch (Type) {
1806
1814 }
1816}
1817
1819 switch (Type) {
1825 return false;
1826
1830 return true;
1831 }
1833}
1834
1838
1840 switch (Type) {
1843 return true;
1844
1851 return false;
1852 }
1854}
1855
1856bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1858 unsigned CommonStride) {
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1872
1873 uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
1875 MaxStoreLoadForwardSafeDistanceInBits);
1876
1877
1878 for (uint64_t VF = 2 * TypeByteSize;
1879 VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
1880
1881
1882 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1883 MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
1884 break;
1885 }
1886 }
1887
1888 if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
1890 dbgs() << "LAA: Distance " << Distance
1891 << " that could cause a store-load forwarding conflict\n");
1892 return true;
1893 }
1894
1895 if (CommonStride &&
1896 MaxVFWithoutSLForwardIssuesPowerOf2 <
1897 MaxStoreLoadForwardSafeDistanceInBits &&
1898 MaxVFWithoutSLForwardIssuesPowerOf2 !=
1900 uint64_t MaxVF =
1901 bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
1902 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1903 MaxStoreLoadForwardSafeDistanceInBits =
1904 std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
1905 }
1906 return false;
1907}
1908
1910 if (Status < S)
1911 Status = S;
1912}
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1927 const SCEV &MaxBTC, const SCEV &Dist,
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1949
1950 const SCEV *CastedDist = &Dist;
1951 const SCEV *CastedProduct = Product;
1952 uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
1953 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1954
1955
1956
1957
1958 if (DistTypeSizeBits > ProductTypeSizeBits)
1960 else
1962
1963
1964
1967 return true;
1968
1969
1970
1974}
1975
1976
1977
1978
1979
1980
1983 assert(Stride > 1 && "The stride must be greater than 1");
1984 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1985 assert(Distance > 0 && "The distance must be non-zero");
1986
1987
1988 if (Distance % TypeByteSize)
1989 return false;
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007 return Distance % Stride;
2008}
2009
2010bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
2011 Type *SrcTy,
2012 const SCEV *Sink,
2013 Type *SinkTy) {
2014 const SCEV *BTC = PSE.getBackedgeTakenCount();
2015 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
2016 ScalarEvolution &SE = *PSE.getSE();
2017 const auto &[SrcStart_, SrcEnd_] =
2019 &SE, &PointerBounds, DT, AC, LoopGuards);
2021 return false;
2022
2023 const auto &[SinkStart_, SinkEnd_] =
2025 &SE, &PointerBounds, DT, AC, LoopGuards);
2028 return false;
2029
2030 if (!LoopGuards)
2032
2034 auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);
2036 return true;
2037
2038 auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);
2039 auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);
2041}
2042
2044 MemoryDepChecker::DepDistanceStrideAndSizeInfo>
2045MemoryDepChecker::getDependenceDistanceStrideAndSize(
2046 const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
2047 const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
2048 const auto &DL = InnermostLoop->getHeader()->getDataLayout();
2049 auto &SE = *PSE.getSE();
2050 const auto &[APtr, AIsWrite] = A;
2051 const auto &[BPtr, BIsWrite] = B;
2052
2053
2054 if (!AIsWrite && !BIsWrite)
2056
2059
2060
2061 if (APtr->getType()->getPointerAddressSpace() !=
2062 BPtr->getType()->getPointerAddressSpace())
2064
2065 std::optional<int64_t> StrideAPtr = getPtrStride(
2066 PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);
2067 std::optional<int64_t> StrideBPtr = getPtrStride(
2068 PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);
2069
2070 const SCEV *Src = PSE.getSCEV(APtr);
2071 const SCEV *Sink = PSE.getSCEV(BPtr);
2072
2073
2074
2075
2076 if (StrideAPtr && *StrideAPtr < 0) {
2080 std::swap(StrideAPtr, StrideBPtr);
2081 }
2082
2083 const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
2084
2085 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
2086 << "\n");
2087 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
2088 << ": " << *Dist << "\n");
2089
2090
2091
2092
2093
2094
2095
2096
2097 if (!StrideAPtr || !StrideBPtr) {
2098 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
2100 }
2101
2102 int64_t StrideAPtrInt = *StrideAPtr;
2103 int64_t StrideBPtrInt = *StrideBPtr;
2104 LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
2105 << " Sink induction step: " << StrideBPtrInt << "\n");
2106
2107
2108 if (!StrideAPtrInt || !StrideBPtrInt)
2110
2111
2112
2113 if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
2115 dbgs() << "Pointer access with strides in different directions\n");
2117 }
2118
2119 TypeSize AStoreSz = DL.getTypeStoreSize(ATy);
2120 TypeSize BStoreSz = DL.getTypeStoreSize(BTy);
2121
2122
2123
2124 uint64_t ASz = DL.getTypeAllocSize(ATy);
2125 uint64_t BSz = DL.getTypeAllocSize(BTy);
2126 uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
2127
2128 uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
2129 uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
2130
2131 uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);
2132
2133 std::optional<uint64_t> CommonStride;
2134 if (StrideAScaled == StrideBScaled)
2135 CommonStride = StrideAScaled;
2136
2137
2138
2140 ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
2141
2142
2144 LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");
2146 }
2147
2148 return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2149 TypeByteSize, AIsWrite, BIsWrite);
2150}
2151
2153MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2155 assert(AIdx < BIdx && "Must pass arguments in program order");
2156
2157
2158
2159
2160 auto CheckCompletelyBeforeOrAfter = [&]() {
2161 auto *APtr = A.getPointer();
2162 auto *BPtr = B.getPointer();
2165 const SCEV *Src = PSE.getSCEV(APtr);
2166 const SCEV *Sink = PSE.getSCEV(BPtr);
2167 return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);
2168 };
2169
2170
2171
2172 auto Res =
2173 getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2174 if (std::holds_alternativeDependence::DepType(Res)) {
2176 CheckCompletelyBeforeOrAfter())
2178 return std::getDependence::DepType(Res);
2179 }
2180
2181 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
2182 std::get(Res);
2183 bool HasSameSize = TypeByteSize > 0;
2184
2185 ScalarEvolution &SE = *PSE.getSE();
2186 auto &DL = InnermostLoop->getHeader()->getDataLayout();
2187
2188
2189
2190
2191
2192
2193 if (HasSameSize &&
2195 DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
2197
2198
2199
2200 const APInt *APDist = nullptr;
2201 uint64_t ConstDist =
2203
2204
2205 if (APDist) {
2206
2207
2208 if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
2210 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2212 }
2213 } else {
2214 if (!LoopGuards)
2215 LoopGuards.emplace(
2218 }
2219
2220
2223 if (HasSameSize) {
2224
2226 }
2227 LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2228 "different type sizes\n");
2230 }
2231
2232 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2233
2234
2235
2236
2237
2238
2239
2240
2242 if (!ConstDist) {
2245 }
2246 if (!HasSameSize ||
2247 couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
2249 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2251 }
2252 }
2253
2254 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2256 }
2257
2259
2260 if (MinDistance <= 0) {
2263 }
2264
2265 if (!HasSameSize) {
2266 if (CheckCompletelyBeforeOrAfter())
2268 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2269 "different type sizes\n");
2271 }
2272
2277
2278 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313 uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
2314 if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
2315 if (!ConstDist) {
2316
2317
2318
2319
2322 }
2323 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
2324 << MinDistance << '\n');
2326 }
2327
2328
2329
2330 if (MinDistanceNeeded > MinDepDistBytes) {
2331 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2332 << MinDistanceNeeded << " size in bytes\n");
2334 }
2335
2336 MinDepDistBytes =
2337 std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
2338
2339 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2341 couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
2343
2344 uint64_t MaxVF = MinDepDistBytes / MaxStride;
2345 LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
2346 << " with max VF = " << MaxVF << '\n');
2347
2348 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2349 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2350
2351
2352
2353
2356 }
2357
2358 if (CheckCompletelyBeforeOrAfter())
2360
2361 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2363}
2364
2367
2368 MinDepDistBytes = -1;
2371 if (Visited.contains(CurAccess))
2372 continue;
2373
2374
2379
2380
2381 while (AI != AE) {
2382 Visited.insert(*AI);
2383 bool AIIsWrite = AI->getInt();
2384
2385
2387 (AIIsWrite ? AI : std::next(AI));
2388 while (OI != AE) {
2389
2390 auto &Acc = Accesses[*AI];
2391 for (std::vector::iterator I1 = Acc.begin(), I1E = Acc.end();
2392 I1 != I1E; ++I1)
2393
2394
2395 for (std::vector::iterator
2396 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2397 I2E = (OI == AI ? I1E : Accesses[*OI].end());
2398 I2 != I2E; ++I2) {
2399 auto A = std::make_pair(&*AI, *I1);
2400 auto B = std::make_pair(&*OI, *I2);
2401
2403 if (*I1 > *I2)
2405
2407 isDependent(*A.first, A.second, *B.first, B.second);
2409
2410
2411
2412
2413
2414 if (RecordDependences) {
2416 Dependences.emplace_back(A.second, B.second, Type);
2417
2419 RecordDependences = false;
2420 Dependences.clear();
2422 << "Too many dependences, stopped recording\n");
2423 }
2424 }
2426 return false;
2427 }
2428 ++OI;
2429 }
2430 ++AI;
2431 }
2432 }
2433
2434 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2436}
2437
2441 auto I = Accesses.find(Access);
2443 if (I != Accesses.end()) {
2444 transform(I->second, std::back_inserter(Insts),
2445 [&](unsigned Idx) { return this->InstMap[Idx]; });
2446 }
2447
2448 return Insts;
2449}
2450
2452 "NoDep",
2453 "Unknown",
2454 "IndirectUnsafe",
2455 "Forward",
2456 "ForwardButPreventsForwarding",
2457 "Backward",
2458 "BackwardVectorizable",
2459 "BackwardVectorizableButPreventsForwarding"};
2460
2468
2469bool LoopAccessInfo::canAnalyzeLoop() {
2470
2473 << TheLoop->getLocStr() << "\n");
2474
2475
2477 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2478 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2479 return false;
2480 }
2481
2482
2485 dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2486 recordAnalysis("CFGNotUnderstood")
2487 << "loop control flow is not understood by analyzer";
2488 return false;
2489 }
2490
2491
2492
2493
2496 recordAnalysis("CantComputeNumberOfIterations")
2497 << "could not determine number of loop iterations";
2498 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2499 return false;
2500 }
2501
2502 LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "
2504 return true;
2505}
2506
2507bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
2508 const TargetLibraryInfo *TLI,
2509 DominatorTree *DT) {
2510
2513 SmallPtrSet<MDNode *, 8> LoopAliasScopes;
2514
2515
2516 unsigned NumReads = 0;
2517 unsigned NumReadWrites = 0;
2518
2519 bool HasComplexMemInst = false;
2520
2521
2522 HasConvergentOp = false;
2523
2524 PtrRtChecking->Pointers.clear();
2525 PtrRtChecking->Need = false;
2526
2528
2529 const bool EnableMemAccessVersioningOfLoop =
2532
2533
2534
2535 LoopBlocksRPO RPOT(TheLoop);
2536 RPOT.perform(LI);
2537 for (BasicBlock *BB : RPOT) {
2538
2539
2540 for (Instruction &I : *BB) {
2543 HasConvergentOp = true;
2544 }
2545
2546
2547
2548 if (HasComplexMemInst && HasConvergentOp)
2549 return false;
2550
2551
2552 if (HasComplexMemInst)
2553 continue;
2554
2555
2557 for (Metadata *Op : Decl->getScopeList()->operands())
2559
2560
2561
2562
2565 continue;
2566
2567
2568
2569
2570 if (I.mayReadFromMemory()) {
2571 auto hasPointerArgs = [](CallBase *CB) {
2572 return any_of(CB->args(), [](Value const *Arg) {
2573 return Arg->getType()->isPointerTy();
2574 });
2575 };
2576
2577
2578
2579
2582 continue;
2583
2585 if (!Ld) {
2586 recordAnalysis("CantVectorizeInstruction", Ld)
2587 << "instruction cannot be vectorized";
2588 HasComplexMemInst = true;
2589 continue;
2590 }
2591 if (!Ld->isSimple() && !IsAnnotatedParallel) {
2592 recordAnalysis("NonSimpleLoad", Ld)
2593 << "read with atomic ordering or volatile read";
2594 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2595 HasComplexMemInst = true;
2596 continue;
2597 }
2598 NumLoads++;
2601 if (EnableMemAccessVersioningOfLoop)
2602 collectStridedAccess(Ld);
2603 continue;
2604 }
2605
2606
2607 if (I.mayWriteToMemory()) {
2609 if (!St) {
2610 recordAnalysis("CantVectorizeInstruction", St)
2611 << "instruction cannot be vectorized";
2612 HasComplexMemInst = true;
2613 continue;
2614 }
2615 if (!St->isSimple() && !IsAnnotatedParallel) {
2616 recordAnalysis("NonSimpleStore", St)
2617 << "write with atomic ordering or volatile write";
2618 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2619 HasComplexMemInst = true;
2620 continue;
2621 }
2622 NumStores++;
2625 if (EnableMemAccessVersioningOfLoop)
2626 collectStridedAccess(St);
2627 }
2628 }
2629 }
2630
2631 if (HasComplexMemInst)
2632 return false;
2633
2634
2635
2636
2637
2638
2639 if (!Stores.size()) {
2640 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2641 return true;
2642 }
2643
2645 AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,
2646 LoopAliasScopes);
2647
2648
2649
2650
2651
2652
2653 SmallSet<std::pair<Value *, Type *>, 16> Seen;
2654
2655
2656
2657 SmallPtrSet<Value *, 16> UniformStores;
2658
2659 for (StoreInst *ST : Stores) {
2660 Value *Ptr = ST->getPointerOperand();
2661
2662 if (isInvariant(Ptr)) {
2663
2664 StoresToInvariantAddresses.push_back(ST);
2665 HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
2666 !UniformStores.insert(Ptr).second;
2667 }
2668
2669
2670
2672 if (Seen.insert({Ptr, AccessTy}).second) {
2673 ++NumReadWrites;
2674
2676
2677
2678
2679 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2681
2683 [&Accesses, AccessTy, Loc](Value *Ptr) {
2684 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2685 Accesses.addStore(NewLoc, AccessTy);
2686 });
2687 }
2688 }
2689
2690 if (IsAnnotatedParallel) {
2692 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2693 << "checks.\n");
2694 return true;
2695 }
2696
2697 for (LoadInst *LD : Loads) {
2698 Value *Ptr = LD->getPointerOperand();
2699
2700
2701
2702
2703
2704
2705
2706
2707 bool IsReadOnlyPtr = false;
2709 if (Seen.insert({Ptr, AccessTy}).second ||
2710 (*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,
2711 true)) {
2712 ++NumReads;
2713 IsReadOnlyPtr = true;
2714 }
2715
2716
2717
2718 if (UniformStores.contains(Ptr)) {
2719 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2720 "load and uniform store to the same address!\n");
2721 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2722 }
2723
2725
2726
2727
2728 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2730
2732 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2733 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2734 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2735 });
2736 }
2737
2738
2739
2740 if (NumReadWrites == 1 && NumReads == 0) {
2741 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2742 return true;
2743 }
2744
2745
2746
2747 Accesses.buildDependenceSets();
2748
2749
2750
2751 Value *UncomputablePtr = nullptr;
2752 HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
2753 *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial);
2754 if (!HasCompletePtrRtChecking) {
2756 recordAnalysis("CantIdentifyArrayBounds", I)
2757 << "cannot identify array bounds";
2758 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2759 << "the array bounds.\n");
2760 return false;
2761 }
2762
2764 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2765
2766 bool DepsAreSafe = true;
2767 if (Accesses.isDependencyCheckNeeded()) {
2768 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2769 DepsAreSafe =
2770 DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
2771
2773 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2774
2775
2776 Accesses.resetDepChecks(*DepChecker);
2777
2778 PtrRtChecking->reset();
2779 PtrRtChecking->Need = true;
2780
2781 UncomputablePtr = nullptr;
2782 HasCompletePtrRtChecking =
2783 Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
2784 UncomputablePtr, AllowPartial);
2785
2786
2787 if (!HasCompletePtrRtChecking) {
2789 recordAnalysis("CantCheckMemDepsAtRunTime", I)
2790 << "cannot check memory dependencies at runtime";
2791 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2792 return false;
2793 }
2794 DepsAreSafe = true;
2795 }
2796 }
2797
2798 if (HasConvergentOp) {
2799 recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2800 << "cannot add control dependency to convergent operation";
2801 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2802 "would be needed with a convergent operation\n");
2803 return false;
2804 }
2805
2806 if (DepsAreSafe) {
2808 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2809 << (PtrRtChecking->Need ? "" : " don't")
2810 << " need runtime memory checks.\n");
2811 return true;
2812 }
2813
2814 emitUnsafeDependenceRemark();
2815 return false;
2816}
2817
2818void LoopAccessInfo::emitUnsafeDependenceRemark() {
2819 const auto *Deps = getDepChecker().getDependences();
2820 if (!Deps)
2821 return;
2822 const auto *Found =
2823 llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2826 });
2827 if (Found == Deps->end())
2828 return;
2829 MemoryDepChecker::Dependence Dep = *Found;
2830
2831 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2832
2833
2834 bool HasForcedDistribution = false;
2835 std::optional<const MDOperand *> Value =
2838 const MDOperand *Op = *Value;
2841 }
2842
2843 const std::string Info =
2844 HasForcedDistribution
2845 ? "unsafe dependent memory operations in loop."
2846 : "unsafe dependent memory operations in loop. Use "
2847 "#pragma clang loop distribute(enable) to allow loop distribution "
2848 "to attempt to isolate the offending operations into a separate "
2849 "loop";
2850 OptimizationRemarkAnalysis &R =
2851 recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;
2852
2853 switch (Dep.Type) {
2859 R << "\nBackward loop carried data dependence.";
2860 break;
2862 R << "\nForward loop carried data dependence that prevents "
2863 "store-to-load forwarding.";
2864 break;
2866 R << "\nBackward loop carried data dependence that prevents "
2867 "store-to-load forwarding.";
2868 break;
2870 R << "\nUnsafe indirect dependence.";
2871 break;
2873 R << "\nUnknown data dependence.";
2874 break;
2875 }
2876
2877 if (Instruction *I = Dep.getSource(getDepChecker())) {
2878 DebugLoc SourceLoc = I->getDebugLoc();
2880 SourceLoc = DD->getDebugLoc();
2881 if (SourceLoc)
2882 R << " Memory location is the same as accessed at "
2883 << ore::NV("Location", SourceLoc);
2884 }
2885}
2886
2888 const Loop *TheLoop,
2890 assert(TheLoop->contains(BB) && "Unknown block used");
2891
2892
2893 const BasicBlock *Latch = TheLoop->getLoopLatch();
2894 return !DT->dominates(BB, Latch);
2895}
2896
2899 assert(!Report && "Multiple reports generated");
2900
2903
2904 if (I) {
2905 CodeRegion = I->getParent();
2906
2907
2908 if (I->getDebugLoc())
2910 }
2911
2912 Report = std::make_unique(DEBUG_TYPE, RemarkName,
2913 DL, CodeRegion);
2914 return *Report;
2915}
2916
2918 auto *SE = PSE->getSE();
2919 if (TheLoop->isLoopInvariant(V))
2920 return true;
2922 return false;
2925}
2926
2927
2928
2932 if ()
2933 return Ptr;
2934
2936 for (const Use &U : GEP->operands()) {
2938 if (V == Ptr)
2939 V = U;
2940 else
2941
2942 return Ptr;
2943 }
2944 }
2945 return V;
2946}
2947
2948
2949
2952 if (!PtrTy)
2953 return nullptr;
2954
2955
2956
2957
2958 Value *OrigPtr = Ptr;
2959
2962
2963 if (Ptr != OrigPtr)
2964
2966 V = C->getOperand();
2967
2969 return nullptr;
2970
2971
2972
2974 return nullptr;
2975
2976
2978 return V;
2979
2982 return V;
2983
2984 return nullptr;
2985}
2986
2987void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2989 if (!Ptr)
2990 return;
2991
2992
2993
2994
2995
2996
2997
2999 if (!StrideExpr)
3000 return;
3001
3003 return;
3004
3005 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
3006 "versioning:");
3007 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
3008
3010 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");
3011 return;
3012 }
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
3028
3029
3030
3031
3033 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
3034 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
3035 const SCEV *CastedStride = StrideExpr;
3036 const SCEV *CastedBECount = MaxBTC;
3037 ScalarEvolution *SE = PSE->getSE();
3038 if (BETypeSizeBits >= StrideTypeSizeBits)
3040 else
3042 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
3043
3044
3045
3048 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
3049 "Stride==1 predicate will imply that the loop executes "
3050 "at most once.\n");
3051 return;
3052 }
3053 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
3054
3055
3056
3057 const SCEV *StrideBase = StrideExpr;
3059 StrideBase = C->getOperand();
3061}
3062
3069 PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
3070 unsigned MaxTargetVectorWidthInBits = std::numeric_limits::max();
3071 if (TTI && ->enableScalableVectorization())
3072
3073
3074 MaxTargetVectorWidthInBits =
3076
3077 DepChecker = std::make_unique(
3078 *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3079 PtrRtChecking =
3080 std::make_unique(*DepChecker, SE, LoopGuards);
3081 if (canAnalyzeLoop())
3082 CanVecMem = analyzeLoop(AA, LI, TLI, DT);
3083}
3084
3086 if (CanVecMem) {
3087 OS.indent(Depth) << "Memory dependences are safe";
3090 OS << " with a maximum safe vector width of "
3094 OS << ", with a maximum safe store-load forward width of " << SLDist
3095 << " bits";
3096 }
3097 if (PtrRtChecking->Need)
3098 OS << " with run-time checks";
3099 OS << "\n";
3100 }
3101
3102 if (HasConvergentOp)
3103 OS.indent(Depth) << "Has convergent operation in loop\n";
3104
3105 if (Report)
3106 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
3107
3108 if (auto *Dependences = DepChecker->getDependences()) {
3110 for (const auto &Dep : *Dependences) {
3111 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
3112 OS << "\n";
3113 }
3114 } else
3115 OS.indent(Depth) << "Too many dependences, not recorded\n";
3116
3117
3118 PtrRtChecking->print(OS, Depth);
3119 if (PtrRtChecking->Need && !HasCompletePtrRtChecking)
3120 OS.indent(Depth) << "Generated run-time checks are incomplete\n";
3121 OS << "\n";
3122
3124 << "Non vectorizable stores to invariant address were "
3125 << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3126 HasLoadStoreDependenceInvolvingLoopInvariantAddress
3127 ? ""
3128 : "not ")
3129 << "found in loop.\n";
3130
3131 OS.indent(Depth) << "SCEV assumptions:\n";
3132 PSE->getPredicate().print(OS, Depth);
3133
3134 OS << "\n";
3135
3136 OS.indent(Depth) << "Expressions re-written:\n";
3137 PSE->print(OS, Depth);
3138}
3139
3141 bool AllowPartial) {
3142 const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);
3143
3144
3145
3146 if (Inserted || It->second->hasAllowPartial() != AllowPartial)
3147 It->second = std::make_unique(&L, &SE, TTI, TLI, &AA, &DT,
3148 &LI, AC, AllowPartial);
3149
3150 return *It->second;
3151}
3153
3154
3155
3156
3157 for (const auto &[L, LAI] : LoopAccessInfoMap) {
3158 if (LAI->getRuntimePointerChecking()->getChecks().empty() &&
3159 LAI->getPSE().getPredicate().isAlwaysTrue())
3160 continue;
3161 LoopAccessInfoMap.erase(L);
3162 }
3163}
3164
3167 FunctionAnalysisManager::Invalidator &Inv) {
3168
3171
3172 return true;
3173
3174
3175
3176
3177 return Inv.invalidate<AAManager>(F, PA) ||
3181}
3182
3194
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)
Definition LoopAccessAnalysis.cpp:1113
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static const SCEV * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition LoopAccessAnalysis.cpp:2950
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of t...
Definition LoopAccessAnalysis.cpp:213
static std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)
Try to compute a constant stride for AR.
Definition LoopAccessAnalysis.cpp:970
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap(ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)
Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
Definition LoopAccessAnalysis.cpp:754
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
Definition LoopAccessAnalysis.cpp:1074
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, Type *AccessTy, const Loop *L, bool Assume, const DominatorTree &DT, std::optional< int64_t > Stride=std::nullopt)
Check whether AR is a non-wrapping AddRec.
Definition LoopAccessAnalysis.cpp:1020
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)
Given a dependence-distance Dist between two memory accesses, that have strides in the same direction...
Definition LoopAccessAnalysis.cpp:1926
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
Definition LoopAccessAnalysis.cpp:1981
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Definition LoopAccessAnalysis.cpp:558
static const SCEV * mulSCEVOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A * B, if it is guaranteed not to unsigned wrap.
Definition LoopAccessAnalysis.cpp:204
static Value * getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If Ptr is a GEP, which has a loop-variant operand, return that operand.
Definition LoopAccessAnalysis.cpp:2929
static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
static cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
static const SCEV * addSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A + B, if it is guaranteed not to unsigned wrap.
Definition LoopAccessAnalysis.cpp:195
This header provides classes for managing per-loop analyses.
This file provides utility analysis objects describing memory locations.
FunctionAnalysisManager FAM
This file defines the PointerIntPair class.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This pass exposes codegen information to IR-level passes.
static const X86InstrFMA3Group Groups[]
A manager for alias analyses.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
APInt abs() const
Get the absolute value.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
int64_t getSExtValue() const
Get sign extended value.
This templated class represents "all analyses that operate over " (e....
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isConvergent() const
Determine if the invoke is convergent.
@ ICMP_UGE
unsigned greater or equal
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
iterator_range< member_iterator > members(const ECValue &ECV) const
bool contains(const ElemTy &V) const
Returns true if V is contained an equivalence class.
const ECValue & insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
member_iterator findLeader(const ElemTy &V) const
findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
PointerType * getType() const
Global values are always pointers.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
Value * getPointerOperand()
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI Result run(Function &F, FunctionAnalysisManager &AM)
Definition LoopAccessAnalysis.cpp:3183
LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Definition LoopAccessAnalysis.cpp:3165
LLVM_ABI void clear()
Definition LoopAccessAnalysis.cpp:3152
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Definition LoopAccessAnalysis.cpp:3140
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
LLVM_ABI bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
Definition LoopAccessAnalysis.cpp:2917
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
Definition LoopAccessAnalysis.cpp:3085
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition LoopAccessAnalysis.cpp:2887
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial=false)
Definition LoopAccessAnalysis.cpp:3063
Analysis pass that exposes the LoopInfo for a function.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Represents a single loop in the control flow graph.
std::string getLocStr() const
Return a string containing the debug location of the loop (file name + line number if present,...
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
ArrayRef< MDOperand > operands() const
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyStoreLoadForwardDistances() const
Return true if there are no store-load forwarding dependencies.
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, const MemAccessInfoList &CheckDeps)
Check whether the dependencies between the accesses are safe, and records the dependence information ...
Definition LoopAccessAnalysis.cpp:2365
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
bool shouldRetryWithRuntimeChecks() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
SmallVector< MemAccessInfo, 8 > MemAccessInfoList
LLVM_ABI SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
Definition LoopAccessAnalysis.cpp:2439
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
@ PossiblySafeWithRtChecks
LLVM_ABI void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
Definition LoopAccessAnalysis.cpp:1781
PointerIntPair< Value *, 1, bool > MemAccessInfo
uint64_t getStoreLoadForwardSafeDistanceInBits() const
Return safe power-of-2 number of elements, which do not prevent store-load forwarding,...
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
LLVM_ABI void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition LoopAccessAnalysis.cpp:761
LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition LoopAccessAnalysis.cpp:547
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
Definition LoopAccessAnalysis.cpp:780
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
Definition LoopAccessAnalysis.cpp:540
friend struct RuntimeCheckingPtrGroup
static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition LoopAccessAnalysis.cpp:729
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
LLVM_ABI void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
Definition LoopAccessAnalysis.cpp:401
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?
LLVM_ABI const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
APInt getSignedRangeMin(const SCEV *S)
Determine the min of the signed range for a particular SCEV.
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)
Return an expression for a TypeSize.
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool canBeFreed() const
Return true if the memory object referred to by V can by freed in the scope for which the SSA value d...
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
is_undef_or_poison m_scev_UndefOrPoison()
Match an SCEVUnknown wrapping undef or poison.
class_match< const SCEVConstant > m_SCEVConstant()
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
specificscev_ty m_scev_Specific(const SCEV *S)
Match if we have a specific specified SCEV.
class_match< const SCEV > m_SCEV()
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)
Check whether Metadata has a Value.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI bool willNotFreeBetween(const Instruction *Assume, const Instruction *CtxI)
Returns true, if no instruction between Assume and CtxI may free memory and the function is marked as...
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI RetainedKnowledge getKnowledgeForValue(const Value *V, ArrayRef< Attribute::AttrKind > AttrKinds, AssumptionCache &AC, function_ref< bool(RetainedKnowledge, Instruction *, const CallBase::BundleOpInfo *)> Filter=[](auto...) { return true;})
Return a valid Knowledge associated to the Value V if its Attribute kind is in AttrKinds and it match...
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto dyn_cast_or_null(const Y &Val)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< int64_t > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
Definition LoopAccessAnalysis.cpp:1657
LLVM_ABI bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
Definition LoopAccessAnalysis.cpp:1726
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition LoopAccessAnalysis.cpp:155
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
Definition LoopAccessAnalysis.cpp:1767
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Calculate Start and End points of memory access using exact backedge taken count BTC if computable or...
Definition LoopAccessAnalysis.cpp:318
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition LoopAccessAnalysis.cpp:1623
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
IR Values for the lower and upper bounds of a pointer evolution.
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * TBAA
The tag for type-based alias analysis.
MDNode * NoAlias
The tag specifying the noalias scope.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
LLVM_ABI bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Definition LoopAccessAnalysis.cpp:1835
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
LLVM_ABI bool isForward() const
Lexically forward dependence.
Definition LoopAccessAnalysis.cpp:1839
LLVM_ABI bool isBackward() const
Lexically backward dependence.
Definition LoopAccessAnalysis.cpp:1818
LLVM_ABI void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Definition LoopAccessAnalysis.cpp:2461
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
@ BackwardVectorizableButPreventsForwarding
@ ForwardButPreventsForwarding
static LLVM_ABI const char * DepName[]
String version of the types.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Definition LoopAccessAnalysis.cpp:1800
Represent one information held inside an operand bundle of an llvm.assume.
unsigned AddressSpace
Address space of the involved pointers.
LLVM_ABI bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
Definition LoopAccessAnalysis.cpp:566
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
LLVM_ABI RuntimeCheckingPtrGroup(unsigned Index, const RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
Definition LoopAccessAnalysis.cpp:183
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI unsigned VectorizationFactor
VF as overridden by the user.
static LLVM_ABI unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition LoopAccessAnalysis.cpp:151
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static LLVM_ABI bool HoistRuntimeChecks
Function object to check whether the first component of a container supported by std::get (like std::...