LLVM: lib/Transforms/Vectorize/LoopVectorizationLegality.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
32
33using namespace llvm;
35
36#define LV_NAME "loop-vectorize"
37#define DEBUG_TYPE LV_NAME
38
41 cl::desc("Enable if-conversion during vectorization."));
42
45 cl::desc("Enable recognition of non-constant strided "
46 "pointer induction variables."));
47
50 cl::desc("Allow enabling loop hints to reorder "
51 "FP operations during vectorization."));
52
53
54
57 cl::desc("The maximum number of SCEV checks allowed."));
58
61 cl::desc("The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
63
68 cl::desc("Control whether the compiler can use scalable vectors to "
69 "vectorize a loop"),
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
81
84 cl::desc("Enables autovectorization of some loops containing histograms"));
85
86
88
89namespace llvm {
90
91bool LoopVectorizeHints::Hint::validate(unsigned Val) {
92 switch (Kind) {
93 case HK_WIDTH:
95 case HK_INTERLEAVE:
97 case HK_FORCE:
98 return (Val <= 1);
99 case HK_ISVECTORIZED:
100 case HK_PREDICATE:
101 case HK_SCALABLE:
102 return (Val == 0 || Val == 1);
103 }
104 return false;
105}
106
108 bool InterleaveOnlyWhenForced,
112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
113 Force("vectorize.enable", FK_Undefined, HK_FORCE),
114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
118
119 getHintsFromMetadata();
120
121
124
125
126
127
128
129
130
135
136 if (Width.Value)
137
138
139
141 }
142
143
144
148
149
152
153 if (IsVectorized.Value != 1)
154
155
156
157 IsVectorized.Value =
160 << "LV: Interleaving disabled by the pass manager\n");
161}
162
164 LLVMContext &Context = TheLoop->getHeader()->getContext();
165
167 Context,
168 {MDString::get(Context, "llvm.loop.isvectorized"),
170 MDNode *LoopID = TheLoop->getLoopID();
173 {Twine(Prefix(), "vectorize.").str(),
174 Twine(Prefix(), "interleave.").str()},
175 {IsVectorizedMD});
176 TheLoop->setLoopID(NewLoopID);
177
178
179 IsVectorized.Value = 1;
180}
181
182void LoopVectorizeHints::reportDisallowedVectorization(
184 const StringRef RemarkMsg, const Loop *L) const {
185 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");
187 L->getHeader())
188 << "loop not vectorized: " << RemarkMsg);
189}
190
192 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
195 reportDisallowedVectorization("#pragma vectorize disable",
196 "MissedExplicitlyDisabled",
197 "vectorization is explicitly disabled", L);
199 reportDisallowedVectorization("loop hasDisableAllTransformsHint",
200 "MissedTransformsDisabled",
201 "loop transformations are disabled", L);
202 } else {
203 llvm_unreachable("loop vect disabled for an unknown reason");
204 }
205 return false;
206 }
207
209 reportDisallowedVectorization(
210 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",
211 "MissedForceOnly", "only vectorizing loops that explicitly request it",
212 L);
213 return false;
214 }
215
217 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
218
219
220
221 ORE.emit([&]() {
223 "AllDisabled", L->getStartLoc(),
224 L->getHeader())
225 << "loop not vectorized: vectorization and interleaving are "
226 "explicitly disabled, or the loop has already been "
227 "vectorized";
228 });
229 return false;
230 }
231
232 return true;
233}
234
236 using namespace ore;
237
238 ORE.emit([&]() {
241 TheLoop->getStartLoc(),
242 TheLoop->getHeader())
243 << "loop not vectorized: vectorization is explicitly disabled";
244
246 TheLoop->getHeader());
247 R << "loop not vectorized";
249 R << " (Force=" << NV("Force", true);
250 if (Width.Value != 0)
251 R << ", Vector Width=" << NV("VectorWidth", getWidth());
253 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
254 R << ")";
255 }
256 return R;
257 });
258}
259
269
271
272
276 EC.getKnownMinValue() > 1);
277}
278
279void LoopVectorizeHints::getHintsFromMetadata() {
281 if (!LoopID)
282 return;
283
284
286 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
287
291
292
293
295 if (!MD || MD->getNumOperands() == 0)
296 continue;
298 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
299 Args.push_back(MD->getOperand(Idx));
300 } else {
302 assert(Args.size() == 0 && "too many arguments for MDString");
303 }
304
305 if (!S)
306 continue;
307
308
310 if (Args.size() == 1)
311 setHint(Name, Args[0]);
312 }
313}
314
316 if (.consume_front(Prefix()))
317 return;
318
320 if ()
321 return;
322 unsigned Val = C->getZExtValue();
323
324 Hint *Hints[] = {&Width, &Interleave, &Force,
325 &IsVectorized, &Predicate, &Scalable};
326 for (auto *H : Hints) {
327 if (Name == H->Name) {
328 if (H->validate(Val))
329 H->Value = Val;
330 else
331 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
332 break;
333 }
334 }
335}
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
362
363
364 if (Lp == OuterLp)
365 return true;
366 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
367
368
370 if () {
371 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
372 return false;
373 }
374
375
378 if (!LatchBr || LatchBr->isUnconditional()) {
379 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
380 return false;
381 }
382
383
385 if (!LatchCmp) {
387 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
388 return false;
389 }
390
391 Value *CondOp0 = LatchCmp->getOperand(0);
392 Value *CondOp1 = LatchCmp->getOperand(1);
393 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
394 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
395 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
396 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
397 return false;
398 }
399
400 return true;
401}
402
403
404
407 return false;
408
409
410 for (Loop *SubLp : *Lp)
412 return false;
413
414 return true;
415}
416
418 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
419
420 if (Ty->isPointerTy())
421 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
422
423
424
425 if (Ty->getScalarSizeInBits() < 32)
427
429}
430
437
438
439
442
443
444 if (!AllowedExit.count(Inst))
445
448
449 if (!TheLoop->contains(UI)) {
450 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
451 return true;
452 }
453 }
454 return false;
455}
456
457
460
462 return true;
463
464
465 Value *APtr = A->getPointerOperand();
466 Value *BPtr = B->getPointerOperand();
467 if (APtr == BPtr)
468 return true;
469
470
472}
473
475 Value *Ptr) const {
476
477
478
479
480 const auto &Strides =
482
483 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
484 AllowRuntimeSCEVChecks, false)
485 .value_or(0);
486 if (Stride == 1 || Stride == -1)
487 return Stride;
488 return 0;
489}
490
492 return LAI->isInvariant(V);
493}
494
495namespace {
496
497
498
499
500
501
502class SCEVAddRecForUniformityRewriter
504
505 unsigned StepMultiplier;
506
507
509
510
511 Loop *TheLoop;
512
513
514 bool CannotAnalyze = false;
515
516 bool canAnalyze() const { return !CannotAnalyze; }
517
518public:
519 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
522 TheLoop(TheLoop) {}
523
524 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
526 "addrec outside of TheLoop must be invariant and should have been "
527 "handled earlier");
528
529
532 if (!SE.isLoopInvariant(Step, TheLoop)) {
533 CannotAnalyze = true;
534 return Expr;
535 }
536 const SCEV *NewStep =
537 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
538 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
539 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
540 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
541 }
542
543 const SCEV *visit(const SCEV *S) {
544 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
545 return S;
547 }
548
549 const SCEV *visitUnknown(const SCEVUnknown *S) {
550 if (SE.isLoopInvariant(S, TheLoop))
551 return S;
552
553 CannotAnalyze = true;
554 return S;
555 }
556
557 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
558
559 CannotAnalyze = true;
560 return S;
561 }
562
563 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
564 unsigned StepMultiplier, unsigned Offset,
565 Loop *TheLoop) {
566
567
568
569
573
574 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
575 TheLoop);
577
581 }
582};
583
584}
585
588 return true;
590 return false;
592 return true;
593
594
595
596 auto *SE = PSE.getSE();
598 return false;
600
601
602
604 const SCEV *FirstLaneExpr =
605 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
607 return false;
608
609
610
611
613 const SCEV *IthLaneExpr =
614 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
615 return FirstLaneExpr == IthLaneExpr;
616 });
617}
618
622 if (!Ptr)
623 return false;
624
625
626
627
629}
630
631bool LoopVectorizationLegality::canVectorizeOuterLoop() {
632 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
633
634
635 bool Result = true;
637
639
640
642 if (!Br) {
644 "loop control flow is not understood by vectorizer",
645 "CFGNotUnderstood", ORE, TheLoop);
646 if (DoExtraAnalysis)
647 Result = false;
648 else
649 return false;
650 }
651
652
653
654
655
656
657
658 if (Br && Br->isConditional() &&
663 "loop control flow is not understood by vectorizer",
664 "CFGNotUnderstood", ORE, TheLoop);
665 if (DoExtraAnalysis)
666 Result = false;
667 else
668 return false;
669 }
670 }
671
672
673
675 TheLoop )) {
677 "loop control flow is not understood by vectorizer",
678 "CFGNotUnderstood", ORE, TheLoop);
679 if (DoExtraAnalysis)
681 else
682 return false;
683 }
684
685
686 if (!setupOuterLoopInductions()) {
688 "UnsupportedPhi", ORE, TheLoop);
689 if (DoExtraAnalysis)
691 else
692 return false;
693 }
694
696}
697
698void LoopVectorizationLegality::addInductionPhi(
702
703
704
705
706
708 if (!Casts.empty())
709 InductionCastsToIgnore.insert(*Casts.begin());
710
711 Type *PhiTy = Phi->getType();
712 const DataLayout &DL = Phi->getDataLayout();
713
715 "Expected int, ptr, or FP induction phi type");
716
717
719 if (!WidestIndTy)
721 else
723 }
724
725
727 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
730
731
732
733
734
735 if (!PrimaryInduction || PhiTy == WidestIndTy)
736 PrimaryInduction = Phi;
737 }
738
739
740
741
742
743
744
745 if (PSE.getPredicate().isAlwaysTrue()) {
746 AllowedExit.insert(Phi);
747 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
748 }
749
750 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
751}
752
753bool LoopVectorizationLegality::setupOuterLoopInductions() {
754 BasicBlock *Header = TheLoop->getHeader();
755
756
757 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
758 InductionDescriptor ID;
761 addInductionPhi(&Phi, ID, AllowedExit);
762 return true;
763 }
764
765
767 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
768 return false;
769 };
770
771 return llvm::all_of(Header->phis(), IsSupportedPhi);
772}
773
774
775
776
777
778
779
780
781
782
786
787
788 if (Scalarize) {
789 ElementCount WidestFixedVF, WidestScalableVF;
790 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
797 assert((WidestScalableVF.isZero() || !Scalarize) &&
798 "Caller may decide to scalarize a variant using a scalable VF");
799 }
800 return Scalarize;
801}
802
803
804
807
808
809
810 if (StructTy && !StructTy->containsHomogeneousTypes())
811 return false;
813}
814
815bool LoopVectorizationLegality::canVectorizeInstrs() {
817 bool Result = true;
818
819
821
823 Result &= canVectorizeInstr(I);
824 if (!DoExtraAnalysis && !Result)
825 return false;
826 }
827 }
828
829 if (!PrimaryInduction) {
830 if (Inductions.empty()) {
832 "Did not find one integer induction var",
833 "loop induction variable could not be identified",
834 "NoInductionVariable", ORE, TheLoop);
835 return false;
836 }
837 if (!WidestIndTy) {
839 "Did not find one integer induction var",
840 "integer loop induction variable could not be identified",
841 "NoIntegerInductionVariable", ORE, TheLoop);
842 return false;
843 }
844 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
845 }
846
847
848
849
850 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
851 PrimaryInduction = nullptr;
852
854}
855
856bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
858 BasicBlock *Header = TheLoop->getHeader();
859
861 Type *PhiTy = Phi->getType();
862
866 "Found a non-int non-pointer PHI",
867 "loop control flow is not understood by vectorizer",
868 "CFGNotUnderstood", ORE, TheLoop);
869 return false;
870 }
871
872
873
874
875 if (BB != Header) {
876
877
878
879
880
881 AllowedExit.insert(&I);
882 return true;
883 }
884
885
886 if (Phi->getNumIncomingValues() != 2) {
888 "Found an invalid PHI",
889 "loop control flow is not understood by vectorizer",
890 "CFGNotUnderstood", ORE, TheLoop, Phi);
891 return false;
892 }
893
894 RecurrenceDescriptor RedDes;
896 PSE.getSE())) {
899 Reductions[Phi] = RedDes;
903 "Only min/max recurrences are allowed to have multiple uses "
904 "currently");
905 return true;
906 }
907
908
909
910
911
912 auto IsDisallowedStridedPointerInduction =
913 [](const InductionDescriptor &ID) {
915 return false;
917 ID.getConstIntStepValue() == nullptr;
918 };
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934 InductionDescriptor ID;
936 !IsDisallowedStridedPointerInduction(ID)) {
937 addInductionPhi(Phi, ID, AllowedExit);
938 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
939 return true;
940 }
941
943 AllowedExit.insert(Phi);
944 FixedOrderRecurrences.insert(Phi);
945 return true;
946 }
947
948
949
951 !IsDisallowedStridedPointerInduction(ID)) {
952 addInductionPhi(Phi, ID, AllowedExit);
953 return true;
954 }
955
957 "value that could not be identified as "
958 "reduction is used outside the loop",
959 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
960 Phi);
961 return false;
962 }
963
964
965
966
968
970 !(CI->getCalledFunction() && TLI &&
972
973
974 LibFunc Func;
975 bool IsMathLibCall =
976 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
977 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
978 TLI->hasOptimizedCodeGen(Func);
979
980 if (IsMathLibCall) {
981
982
983
984
986 "Found a non-intrinsic callsite",
987 "library call cannot be vectorized. "
988 "Try compiling with -fno-math-errno, -ffast-math, "
989 "or similar flags",
990 "CantVectorizeLibcall", ORE, TheLoop, CI);
991 } else {
993 "call instruction cannot be vectorized",
994 "CantVectorizeLibcall", ORE, TheLoop, CI);
995 }
996 return false;
997 }
998
999
1000
1001 if (CI) {
1002 auto *SE = PSE.getSE();
1004 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
1006 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
1008 "Found unvectorizable intrinsic",
1009 "intrinsic instruction cannot be vectorized",
1010 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
1011 return false;
1012 }
1013 }
1014 }
1015
1016
1017
1019 VecCallVariantsFound = true;
1020
1021 auto CanWidenInstructionTy = [](Instruction const &Inst) {
1022 Type *InstTy = Inst.getType();
1025
1026
1027
1028
1031 };
1032
1033
1034
1035
1036 if (!CanWidenInstructionTy(I) ||
1041 "instruction return type cannot be vectorized",
1042 "CantVectorizeInstructionReturnType", ORE,
1043 TheLoop, &I);
1044 return false;
1045 }
1046
1047
1049 Type *T = ST->getValueOperand()->getType();
1052 "CantVectorizeStore", ORE, TheLoop, ST);
1053 return false;
1054 }
1055
1056
1057
1058 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1059
1061 assert(VecTy && "did not find vectorized version of stored type");
1062 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1064 "nontemporal store instruction cannot be vectorized",
1065 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1066 return false;
1067 }
1068 }
1069
1071 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1072
1073
1075 assert(VecTy && "did not find vectorized version of load type");
1076 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1078 "nontemporal load instruction cannot be vectorized",
1079 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1080 return false;
1081 }
1082 }
1083
1084
1085
1086
1087
1088
1089 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1090 .isFast()) {
1091 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1092 Hints->setPotentiallyUnsafe();
1093 }
1094
1095
1096
1098
1099
1100
1101
1102 if (PSE.getPredicate().isAlwaysTrue()) {
1103 AllowedExit.insert(&I);
1104 return true;
1105 }
1107 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1108 return false;
1109 }
1110
1111 return true;
1112}
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1130
1131
1135 return false;
1136
1137
1138
1139
1140
1141 Value *HIncVal = nullptr;
1144 return false;
1145
1146
1148 return false;
1149
1150
1152 if ()
1153 return false;
1154
1155
1156 Value *HIdx = nullptr;
1157 for (Value *Index : GEP->indices()) {
1158 if (HIdx)
1159 return false;
1161 HIdx = Index;
1162 }
1163
1164 if (!HIdx)
1165 return false;
1166
1167
1168
1169
1170
1171
1172
1173
1176 return false;
1177
1178
1180 if (!AR || AR->getLoop() != TheLoop)
1181 return false;
1182
1183
1184
1188 return false;
1189
1190 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1191
1192
1193 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1194 return true;
1195}
1196
1197bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1198
1199
1201 return false;
1202
1203
1207
1208
1209 if (!Deps)
1210 return false;
1211
1213
1214
1217 continue;
1218
1219
1220
1221
1223 return false;
1224
1225 IUDep = &Dep;
1226 }
1227 if (!IUDep)
1228 return false;
1229
1230
1233
1234 if (!LI || !SI)
1235 return false;
1236
1237 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1238 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1239}
1240
1241bool LoopVectorizationLegality::canVectorizeMemory() {
1242 LAI = &LAIs.getInfo(*TheLoop);
1243 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1244 if (LAR) {
1245 ORE->emit([&]() {
1246 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1247 "loop not vectorized: ", *LAR);
1248 });
1249 }
1250
1251 if (!LAI->canVectorizeMemory()) {
1254 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1255 "side effects",
1256 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1257 TheLoop);
1258 return false;
1259 }
1260
1261 return canVectorizeIndirectUnsafeDependences();
1262 }
1263
1264 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1266 "write to a loop invariant address could not "
1267 "be vectorized",
1268 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1269 TheLoop);
1270 return false;
1271 }
1272
1273
1274
1275
1276
1277 if (!LAI->getStoresToInvariantAddresses().empty()) {
1278
1279
1280 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1282 continue;
1283
1286 "We don't allow storing to uniform addresses",
1287 "write of conditional recurring variant value to a loop "
1288 "invariant address could not be vectorized",
1289 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1290 return false;
1291 }
1292
1293
1294
1295
1297 if (TheLoop->contains(Ptr)) {
1299 "Invariant address is calculated inside the loop",
1300 "write to a loop invariant address could not "
1301 "be vectorized",
1302 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1303 return false;
1304 }
1305 }
1306 }
1307
1308 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1309
1310
1311
1312
1313
1314 ScalarEvolution *SE = PSE.getSE();
1316 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1330 I->getValueOperand()->getType() ==
1331 SI->getValueOperand()->getType();
1332 });
1333 continue;
1334 }
1336 }
1337
1338 bool IsOK = UnhandledStores.empty();
1339
1340 if (!IsOK) {
1342 "We don't allow storing to uniform addresses",
1343 "write to a loop invariant address could not "
1344 "be vectorized",
1345 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1346 return false;
1347 }
1348 }
1349 }
1350
1351 PSE.addPredicate(LAI->getPSE().getPredicate());
1352 return true;
1353}
1354
1356 bool EnableStrictReductions) {
1357
1358
1359 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1360 return true;
1361
1362
1363
1364
1365 if (!EnableStrictReductions ||
1369 }))
1370 return false;
1371
1372
1373
1374
1378 }));
1379}
1380
1387
1392 return false;
1393
1396 return V == InvariantAddress ||
1398 });
1399}
1400
1402 Value *In0 = const_cast<Value *>(V);
1404 if (!PN)
1405 return false;
1406
1407 return Inductions.count(PN);
1408}
1409
1413 return nullptr;
1417 return &ID;
1418 return nullptr;
1419}
1420
1424 return nullptr;
1427 return &ID;
1428 return nullptr;
1429}
1430
1432 const Value *V) const {
1434 return (Inst && InductionCastsToIgnore.count(Inst));
1435}
1436
1440
1442 const PHINode *Phi) const {
1443 return FixedOrderRecurrences.count(Phi);
1444}
1445
1448
1449
1450
1451 BasicBlock *Latch = TheLoop->getLoopLatch();
1455 "Uncountable exiting block must be a direct predecessor of latch");
1456 return BB == Latch;
1457 }
1459}
1460
1461bool LoopVectorizationLegality::blockCanBePredicated(
1465
1466
1469 continue;
1470 }
1471
1472
1473
1474
1476 continue;
1477
1478
1479
1480
1481
1484 MaskedOp.insert(CI);
1485 continue;
1486 }
1487
1488
1490 if (!SafePtrs.count(LI->getPointerOperand()))
1491 MaskedOp.insert(LI);
1492 continue;
1493 }
1494
1495
1496
1497
1498
1499
1501 MaskedOp.insert(SI);
1502 continue;
1503 }
1504
1505 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1506 return false;
1507 }
1508
1509 return true;
1510}
1511
1512bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1515 "IfConversionDisabled", ORE, TheLoop);
1516 return false;
1517 }
1518
1519 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1520
1521
1522
1523
1524
1525
1526 SmallPtrSet<Value *, 8> SafePointers;
1527
1528
1529 for (BasicBlock *BB : TheLoop->blocks()) {
1531 for (Instruction &I : *BB)
1533 SafePointers.insert(Ptr);
1534 continue;
1535 }
1536
1537
1538
1539
1540
1541
1542 ScalarEvolution &SE = *PSE.getSE();
1544 for (Instruction &I : *BB) {
1546
1547
1548
1549
1550
1551
1552
1553
1554 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1556 SmallPtrSet<Value *, 4> Visited;
1557 while (!Worklist.empty()) {
1559 if (!Visited.insert(CurrV).second)
1560 continue;
1561
1563 if (!CurrI || !TheLoop->contains(CurrI)) {
1564
1565
1567 TheLoop->getLoopPredecessor()
1568 ->getTerminator()
1569 ->getIterator(),
1570 DT))
1571 return false;
1572 continue;
1573 }
1574
1575
1577 return false;
1578
1579
1580
1582 return false;
1584 }
1585 return true;
1586 };
1587
1588
1589
1591 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1593 &Predicates))
1594 SafePointers.insert(LI->getPointerOperand());
1595 Predicates.clear();
1596 }
1597 }
1598
1599
1600 for (BasicBlock *BB : TheLoop->blocks()) {
1601
1602
1604 if (TheLoop->isLoopExiting(BB)) {
1606 "LoopContainsUnsupportedSwitch", ORE,
1607 TheLoop, BB->getTerminator());
1608 return false;
1609 }
1612 "LoopContainsUnsupportedTerminator", ORE,
1613 TheLoop, BB->getTerminator());
1614 return false;
1615 }
1616
1617
1619 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1621 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1622 ORE, TheLoop, BB->getTerminator());
1623 return false;
1624 }
1625 }
1626
1627
1628 return true;
1629}
1630
1631
1632bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1633 bool UseVPlanNativePath) {
1635 "VPlan-native path is not enabled.");
1636
1637
1638
1639
1640
1641
1642
1643
1645 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1646
1647
1648
1651 "loop control flow is not understood by vectorizer",
1652 "CFGNotUnderstood", ORE, TheLoop);
1653 if (DoExtraAnalysis)
1655 else
1656 return false;
1657 }
1658
1659
1662 "loop control flow is not understood by vectorizer",
1663 "CFGNotUnderstood", ORE, TheLoop);
1664 if (DoExtraAnalysis)
1666 else
1667 return false;
1668 }
1669
1670
1674 "The loop latch terminator is not a BranchInst",
1675 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1676 ORE, TheLoop);
1677 if (DoExtraAnalysis)
1679 else
1680 return false;
1681 }
1682
1684}
1685
1686bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1687 Loop *Lp, bool UseVPlanNativePath) {
1688
1689
1691 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1692 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1693 if (DoExtraAnalysis)
1695 else
1696 return false;
1697 }
1698
1699
1700
1701 for (Loop *SubLp : *Lp)
1702 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1703 if (DoExtraAnalysis)
1705 else
1706 return false;
1707 }
1708
1710}
1711
1712bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1713 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1714 if (!LatchBB) {
1716 "Cannot vectorize early exit loop",
1717 "NoLatchEarlyExit", ORE, TheLoop);
1718 return false;
1719 }
1720
1721 if (Reductions.size() || FixedOrderRecurrences.size()) {
1723 "Found reductions or recurrences in early-exit loop",
1724 "Cannot vectorize early exit loop with reductions or recurrences",
1725 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1726 return false;
1727 }
1728
1729 SmallVector<BasicBlock *, 8> ExitingBlocks;
1730 TheLoop->getExitingBlocks(ExitingBlocks);
1731
1732
1734 BasicBlock *SingleUncountableExitingBlock = nullptr;
1735 for (BasicBlock *BB : ExitingBlocks) {
1736 const SCEV *EC =
1737 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1741 "Early exiting block does not have exactly two successors",
1742 "Incorrect number of successors from early exiting block",
1743 "EarlyExitTooManySuccessors", ORE, TheLoop);
1744 return false;
1745 }
1746
1747 if (SingleUncountableExitingBlock) {
1749 "Loop has too many uncountable exits",
1750 "Cannot vectorize early exit loop with more than one early exit",
1751 "TooManyUncountableEarlyExits", ORE, TheLoop);
1752 return false;
1753 }
1754
1755 SingleUncountableExitingBlock = BB;
1756 } else
1757 CountableExitingBlocks.push_back(BB);
1758 }
1759
1760
1761
1762
1763 Predicates.clear();
1764
1765 if (!SingleUncountableExitingBlock) {
1766 LLVM_DEBUG(dbgs() << "LV: Cound not find any uncountable exits");
1767 return false;
1768 }
1769
1770
1771
1773 if (LatchPredBB != SingleUncountableExitingBlock) {
1775 "Cannot vectorize early exit loop",
1776 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1777 return false;
1778 }
1779
1780
1782 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1784 "Cannot determine exact exit count for latch block",
1785 "Cannot vectorize early exit loop",
1786 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1787 return false;
1788 }
1790 "Latch block not found in list of countable exits!");
1791
1792
1793
1794 auto IsSafeOperation = [](Instruction *I) -> bool {
1795 switch (I->getOpcode()) {
1796 case Instruction::Load:
1797 case Instruction::Store:
1798 case Instruction::PHI:
1799 case Instruction::Br:
1800
1801 return true;
1802 default:
1804 }
1805 };
1806
1807 bool HasSideEffects = false;
1808 for (auto *BB : TheLoop->blocks())
1809 for (auto &I : *BB) {
1810 if (I.mayWriteToMemory()) {
1812 HasSideEffects = true;
1813 continue;
1814 }
1815
1816
1818 "Complex writes to memory unsupported in early exit loops",
1819 "Cannot vectorize early exit loop with complex writes to memory",
1820 "WritesInEarlyExitLoop", ORE, TheLoop);
1821 return false;
1822 }
1823
1824 if (!IsSafeOperation(&I)) {
1826 "cannot be speculatively executed",
1827 "UnsafeOperationsEarlyExitLoop", ORE,
1828 TheLoop);
1829 return false;
1830 }
1831 }
1832
1833
1835 "Expected latch predecessor to be the early exiting block");
1836
1838
1839 if (!HasSideEffects) {
1840
1841 Predicates.clear();
1842 if ((TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1843 &Predicates)) {
1845 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1846 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1847 return false;
1848 }
1849 } else if (!canUncountableExitConditionLoadBeMoved(
1850 SingleUncountableExitingBlock))
1851 return false;
1852
1853
1854 for (LoadInst *LI : NonDerefLoads) {
1855
1856 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1857 if (Stride != 1) {
1859 "Loop contains potentially faulting strided load",
1860 "Cannot vectorize early exit loop with "
1861 "strided fault-only-first load",
1862 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1863 return false;
1864 }
1865 PotentiallyFaultingLoads.insert(LI);
1866 LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
1867 << "\n");
1868 }
1869
1870 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1871 PSE.getSymbolicMaxBackedgeTakenCount();
1872
1873
1875 "Failed to get symbolic expression for backedge taken count");
1876 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1877 "backedge taken count: "
1878 << *SymbolicMaxBTC << '\n');
1879 UncountableExitingBB = SingleUncountableExitingBlock;
1880 UncountableExitWithSideEffects = HasSideEffects;
1881 return true;
1882}
1883
1884bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1886
1887
1888
1889
1890
1891
1892
1894
1895 using namespace llvm::PatternMatch;
1897 Value *Ptr = nullptr;
1899 if ((Br->getCondition(),
1903 "Early exit loop with store but no supported condition load",
1904 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1905 return false;
1906 }
1907
1908
1909 if (!TheLoop->isLoopInvariant(R)) {
1911 "Early exit loop with store but no supported condition load",
1912 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1913 return false;
1914 }
1915
1916
1917
1919 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1921 "Uncountable exit condition depends on load with an address that is "
1922 "not an add recurrence in the loop",
1923 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1924 return false;
1925 }
1926
1927
1931 &Predicates)) {
1933 "Loop may fault",
1934 "Cannot vectorize potentially faulting early exit loop",
1935 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1936 return false;
1937 }
1938
1939 ICFLoopSafetyInfo SafetyInfo;
1941
1942
1945 "Load for uncountable exit not guaranteed to execute",
1946 "ConditionalUncountableExitLoad", ORE, TheLoop);
1947 return false;
1948 }
1949
1950
1951
1952
1953 for (auto *BB : TheLoop->blocks()) {
1954 for (auto &I : *BB) {
1955 if (&I == Load)
1956 continue;
1957
1958 if (I.mayWriteToMemory()) {
1960 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1962 continue;
1963 }
1964
1966 "Cannot determine whether critical uncountable exit load address "
1967 "does not alias with a memory write",
1968 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1969 return false;
1970 }
1971 }
1972 }
1973
1974 return true;
1975}
1976
1978
1979
1980 bool Result = true;
1981
1982 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1983
1984
1985 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1986 if (DoExtraAnalysis) {
1987 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1988 Result = false;
1989 } else {
1990 return false;
1991 }
1992 }
1993
1994
1995 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1996 << '\n');
1997
1998
1999
2000 if (!TheLoop->isInnermost()) {
2001 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
2002
2003 if (!canVectorizeOuterLoop()) {
2005 "UnsupportedOuterLoop", ORE, TheLoop);
2006
2007
2008 return false;
2009 }
2010
2011 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
2012 return Result;
2013 }
2014
2015 assert(TheLoop->isInnermost() && "Inner loop expected.");
2016
2017 unsigned NumBlocks = TheLoop->getNumBlocks();
2018 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
2019 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
2020 if (DoExtraAnalysis)
2021 Result = false;
2022 else
2023 return false;
2024 }
2025
2026
2027 if (!canVectorizeInstrs()) {
2028 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
2029 if (DoExtraAnalysis)
2030 Result = false;
2031 else
2032 return false;
2033 }
2034
2036 if (TheLoop->getExitingBlock()) {
2038 "UnsupportedUncountableLoop", ORE, TheLoop);
2039 if (DoExtraAnalysis)
2040 Result = false;
2041 else
2042 return false;
2043 } else {
2044 if (!isVectorizableEarlyExitLoop()) {
2047 "Must be false without vectorizable early-exit loop");
2048 if (DoExtraAnalysis)
2049 Result = false;
2050 else
2051 return false;
2052 }
2053 }
2054 }
2055
2056
2057 if (!canVectorizeMemory()) {
2058 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
2059 if (DoExtraAnalysis)
2060 Result = false;
2061 else
2062 return false;
2063 }
2064
2065
2066 if (UncountableExitWithSideEffects) {
2068 "Writes to memory unsupported in early exit loops",
2069 "Cannot vectorize early exit loop with writes to memory",
2070 "WritesInEarlyExitLoop", ORE, TheLoop);
2071 return false;
2072 }
2073
2074 if (Result) {
2075 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
2076 << (LAI->getRuntimePointerChecking()->Need
2077 ? " (with a runtime bound check)"
2078 : "")
2079 << "!\n");
2080 }
2081
2085
2086 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2087 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
2088 "due to SCEVThreshold");
2090 "Too many SCEV assumptions need to be made and checked at runtime",
2091 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2092 if (DoExtraAnalysis)
2093 Result = false;
2094 else
2095 return false;
2096 }
2097
2098
2099
2100
2101
2102 return Result;
2103}
2104
2106
2107
2108
2109
2110 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2113 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2114 return false;
2115 }
2116
2117 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2118
2120
2122 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
2123
2125 PHINode *OrigPhi = Entry.first;
2126 for (User *U : OrigPhi->users()) {
2128 if (!TheLoop->contains(UI)) {
2129 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
2130 "outside user for "
2131 << *UI << "\n");
2132 return false;
2133 }
2134 }
2135 }
2136
2137
2139
2140
2141
2143 for (BasicBlock *BB : TheLoop->blocks()) {
2144 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2145 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2146 return false;
2147 }
2148 }
2149
2150 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2151
2152 return true;
2153}
2154
2156
2158
2159
2160
2161 for (BasicBlock *BB : TheLoop->blocks()) {
2162 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
2163 assert(R && "Must be able to predicate block when tail-folding.");
2164 }
2165}
2166
2167}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
Definition LoopVectorizationLegality.cpp:36
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
Definition LoopVectorizationLegality.cpp:87
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
@ NoAlias
The two locations do not alias at all.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
Definition LoopVectorizationLegality.cpp:1381
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
Definition LoopVectorizationLegality.cpp:1388
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
Definition LoopVectorizationLegality.cpp:1977
bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition LoopVectorizationLegality.cpp:1446
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
Definition LoopVectorizationLegality.cpp:474
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
Definition LoopVectorizationLegality.cpp:1355
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
Definition LoopVectorizationLegality.cpp:1441
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
Definition LoopVectorizationLegality.cpp:1422
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
Definition LoopVectorizationLegality.cpp:1411
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
Definition LoopVectorizationLegality.cpp:1401
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
Definition LoopVectorizationLegality.cpp:586
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
Definition LoopVectorizationLegality.cpp:491
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
Definition LoopVectorizationLegality.cpp:2105
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
Definition LoopVectorizationLegality.cpp:2155
bool hasUncountableEarlyExit() const
Returns true if the loop has exactly one uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
Definition LoopVectorizationLegality.cpp:619
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block, if there is exactly one.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
Definition LoopVectorizationLegality.cpp:1437
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Definition LoopVectorizationLegality.cpp:1431
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_Unspecified
Not selected.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
enum ForceKind getForce() const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
Definition LoopVectorizationLegality.cpp:191
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
Definition LoopVectorizationLegality.cpp:270
void emitRemarkWithHints() const
Dumps all the hint information.
Definition LoopVectorizationLegality.cpp:235
ElementCount getWidth() const
@ FK_Enabled
Forcing enabled.
@ FK_Undefined
Not selected.
@ FK_Disabled
Forcing disabled.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
Definition LoopVectorizationLegality.cpp:163
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Definition LoopVectorizationLegality.cpp:107
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Definition LoopVectorizationLegality.cpp:260
unsigned getInterleave() const
unsigned getIsVectorized() const
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
LLVM_ABI StringRef getString() const
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
iterator find(const KeyT &Key)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
const Loop * getLoop() const
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
LLVM Value Representation.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr bool isZero() const
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
Definition LoopVectorizationLegality.cpp:405
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
Definition LoopVectorizationLegality.cpp:360
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
Definition LoopVectorizationLegality.cpp:805
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
Definition LoopVectorizationLegality.cpp:431
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
Definition LoopVectorizationLegality.cpp:417
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition LoopVectorizationLegality.cpp:440
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
Definition LoopVectorizationLegality.cpp:458
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
Definition LoopVectorizationLegality.cpp:1127
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Definition LoopVectorizationLegality.cpp:783
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.