LLVM: lib/Transforms/Vectorize/LoopVectorizationLegality.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
30
31using namespace llvm;
32using namespace PatternMatch;
33
34#define LV_NAME "loop-vectorize"
35#define DEBUG_TYPE LV_NAME
36
39 cl::desc("Enable if-conversion during vectorization."));
40
43 cl::desc("Enable recognition of non-constant strided "
44 "pointer induction variables."));
45
46namespace llvm {
49 cl::desc("Allow enabling loop hints to reorder "
50 "FP operations during vectorization."));
51}
52
53
54
57 cl::desc("The maximum number of SCEV checks allowed."));
58
61 cl::desc("The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
63
68 cl::desc("Control whether the compiler can use scalable vectors to "
69 "vectorize a loop"),
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
81
84 cl::desc("Enables autovectorization of some loops containing histograms"));
85
86
88
89namespace llvm {
90
91bool LoopVectorizeHints::Hint::validate(unsigned Val) {
92 switch (Kind) {
93 case HK_WIDTH:
95 case HK_INTERLEAVE:
97 case HK_FORCE:
98 return (Val <= 1);
99 case HK_ISVECTORIZED:
100 case HK_PREDICATE:
101 case HK_SCALABLE:
102 return (Val == 0 || Val == 1);
103 }
104 return false;
105}
106
108 bool InterleaveOnlyWhenForced,
112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
113 Force("vectorize.enable", FK_Undefined, HK_FORCE),
114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
118
119 getHintsFromMetadata();
120
121
124
125
126
127
128
129
130
135
136 if (Width.Value)
137
138
139
141 }
142
143
144
148
149
152
153 if (IsVectorized.Value != 1)
154
155
156
157 IsVectorized.Value =
160 << "LV: Interleaving disabled by the pass manager\n");
161}
162
165
167 Context,
168 {MDString::get(Context, "llvm.loop.isvectorized"),
173 {Twine(Prefix(), "vectorize.").str(),
174 Twine(Prefix(), "interleave.").str()},
175 {IsVectorizedMD});
177
178
179 IsVectorized.Value = 1;
180}
181
183 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
185 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
187 return false;
188 }
189
191 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
193 return false;
194 }
195
197 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
198
199
200
201 ORE.emit([&]() {
203 "AllDisabled", L->getStartLoc(),
204 L->getHeader())
205 << "loop not vectorized: vectorization and interleaving are "
206 "explicitly disabled, or the loop has already been "
207 "vectorized";
208 });
209 return false;
210 }
211
212 return true;
213}
214
216 using namespace ore;
217
218 ORE.emit([&]() {
223 << "loop not vectorized: vectorization is explicitly disabled";
224
227 R << "loop not vectorized";
229 R << " (Force=" << NV("Force", true);
230 if (Width.Value != 0)
231 R << ", Vector Width=" << NV("VectorWidth", getWidth());
232 if (getInterleave() != 0)
233 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
234 R << ")";
235 }
236 return R;
237 });
238}
239
248}
249
251
252
256 EC.getKnownMinValue() > 1);
257}
258
259void LoopVectorizeHints::getHintsFromMetadata() {
261 if (!LoopID)
262 return;
263
264
266 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
267
271
272
273
274 if (const MDNode *MD = dyn_cast(MDO)) {
275 if (!MD || MD->getNumOperands() == 0)
276 continue;
277 S = dyn_cast(MD->getOperand(0));
278 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
279 Args.push_back(MD->getOperand(Idx));
280 } else {
281 S = dyn_cast(MDO);
282 assert(Args.size() == 0 && "too many arguments for MDString");
283 }
284
285 if (!S)
286 continue;
287
288
290 if (Args.size() == 1)
291 setHint(Name, Args[0]);
292 }
293}
294
296 if (.starts_with(Prefix()))
297 return;
299
300 const ConstantInt *C = mdconst::dyn_extract(Arg);
301 if ()
302 return;
303 unsigned Val = C->getZExtValue();
304
305 Hint *Hints[] = {&Width, &Interleave, &Force,
306 &IsVectorized, &Predicate, &Scalable};
307 for (auto *H : Hints) {
309 if (H->validate(Val))
310 H->Value = Val;
311 else
312 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
313 break;
314 }
315 }
316}
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
343
344
345 if (Lp == OuterLp)
346 return true;
347 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
348
349
351 if () {
352 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
353 return false;
354 }
355
356
358 auto *LatchBr = dyn_cast(Latch->getTerminator());
359 if (!LatchBr || LatchBr->isUnconditional()) {
360 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
361 return false;
362 }
363
364
365 auto *LatchCmp = dyn_cast(LatchBr->getCondition());
366 if (!LatchCmp) {
368 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
369 return false;
370 }
371
372 Value *CondOp0 = LatchCmp->getOperand(0);
373 Value *CondOp1 = LatchCmp->getOperand(1);
374 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
375 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
376 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
377 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
378 return false;
379 }
380
381 return true;
382}
383
384
385
388 return false;
389
390
391 for (Loop *SubLp : *Lp)
393 return false;
394
395 return true;
396}
397
400 return DL.getIntPtrType(Ty);
401
402
403
406
407 return Ty;
408}
409
414 return Ty0;
415 return Ty1;
416}
417
418
419
422
423
424 if (!AllowedExit.count(Inst))
425
428
429 if (!TheLoop->contains(UI)) {
430 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
431 return true;
432 }
433 }
434 return false;
435}
436
437
440
442 return true;
443
444
445 Value *APtr = A->getPointerOperand();
446 Value *BPtr = B->getPointerOperand();
447 if (APtr == BPtr)
448 return true;
449
450
452}
453
456
457
458
459
460 const auto &Strides =
462
465 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
466 CanAddPredicate, false).value_or(0);
467 if (Stride == 1 || Stride == -1)
468 return Stride;
469 return 0;
470}
471
474}
475
476namespace {
477
478
479
480
481
482
483class SCEVAddRecForUniformityRewriter
485
486 unsigned StepMultiplier;
487
488
489 unsigned Offset;
490
491
492 Loop *TheLoop;
493
494
495 bool CannotAnalyze = false;
496
497 bool canAnalyze() const { return !CannotAnalyze; }
498
499public:
500 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
501 unsigned Offset, Loop *TheLoop)
502 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
503 TheLoop(TheLoop) {}
504
507 "addrec outside of TheLoop must be invariant and should have been "
508 "handled earlier");
509
510
513 if (!SE.isLoopInvariant(Step, TheLoop)) {
514 CannotAnalyze = true;
515 return Expr;
516 }
517 const SCEV *NewStep =
518 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
519 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
520 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
521 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
522 }
523
525 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
526 return S;
528 }
529
531 if (SE.isLoopInvariant(S, TheLoop))
532 return S;
533
534 CannotAnalyze = true;
535 return S;
536 }
537
539
540 CannotAnalyze = true;
541 return S;
542 }
543
545 unsigned StepMultiplier, unsigned Offset,
546 Loop *TheLoop) {
547
548
549
550
552 [](const SCEV *S) { return isa(S); }))
554
555 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
556 TheLoop);
558
562 }
563};
564
565}
566
569 return true;
571 return false;
573 return true;
574
575
576
577 auto *SE = PSE.getSE();
579 return false;
581
582
583
585 const SCEV *FirstLaneExpr =
586 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
587 if (isa(FirstLaneExpr))
588 return false;
589
590
591
592
593 return all_of(reverse(seq(1, FixedVF)), [&](unsigned I) {
594 const SCEV *IthLaneExpr =
595 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
596 return FirstLaneExpr == IthLaneExpr;
597 });
598}
599
603 if ()
604 return false;
605
606
607
608
610}
611
612bool LoopVectorizationLegality::canVectorizeOuterLoop() {
613 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
614
615
616 bool Result = true;
618
620
621
622 auto *Br = dyn_cast(BB->getTerminator());
623 if (!Br) {
625 "loop control flow is not understood by vectorizer",
626 "CFGNotUnderstood", ORE, TheLoop);
627 if (DoExtraAnalysis)
628 Result = false;
629 else
630 return false;
631 }
632
633
634
635
636
637
638
639 if (Br && Br->isConditional() &&
644 "loop control flow is not understood by vectorizer",
645 "CFGNotUnderstood", ORE, TheLoop);
646 if (DoExtraAnalysis)
647 Result = false;
648 else
649 return false;
650 }
651 }
652
653
654
656 TheLoop )) {
658 "loop control flow is not understood by vectorizer",
659 "CFGNotUnderstood", ORE, TheLoop);
660 if (DoExtraAnalysis)
662 else
663 return false;
664 }
665
666
667 if (!setupOuterLoopInductions()) {
669 "UnsupportedPhi", ORE, TheLoop);
670 if (DoExtraAnalysis)
672 else
673 return false;
674 }
675
677}
678
679void LoopVectorizationLegality::addInductionPhi(
683
684
685
686
687
689 if (!Casts.empty())
690 InductionCastsToIgnore.insert(*Casts.begin());
691
692 Type *PhiTy = Phi->getType();
694
695
697 if (!WidestIndTy)
699 else
701 }
702
703
705 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
706 isa(ID.getStartValue()) &&
707 cast(ID.getStartValue())->isNullValue()) {
708
709
710
711
712
713 if (!PrimaryInduction || PhiTy == WidestIndTy)
714 PrimaryInduction = Phi;
715 }
716
717
718
719
720
721
722
724 AllowedExit.insert(Phi);
726 }
727
728 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
729}
730
731bool LoopVectorizationLegality::setupOuterLoopInductions() {
733
734
735 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
739 addInductionPhi(&Phi, ID, AllowedExit);
740 return true;
741 }
742
743
745 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
746 return false;
747 };
748
749 return llvm::all_of(Header->phis(), IsSupportedPhi);
750}
751
752
753
754
755
756
757
758
759
760
764
765
766 if (Scalarize) {
767 ElementCount WidestFixedVF, WidestScalableVF;
768 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
775 assert((WidestScalableVF.isZero() || !Scalarize) &&
776 "Caller may decide to scalarize a variant using a scalable VF");
777 }
778 return Scalarize;
779}
780
781
782
784 auto *StructTy = dyn_cast(Ty);
785
786
787
788 if (StructTy && !StructTy->containsHomogeneousTypes())
789 return false;
791}
792
793bool LoopVectorizationLegality::canVectorizeInstrs() {
795
796
798
800 if (auto *Phi = dyn_cast(&I)) {
801 Type *PhiTy = Phi->getType();
802
806 "loop control flow is not understood by vectorizer",
807 "CFGNotUnderstood", ORE, TheLoop);
808 return false;
809 }
810
811
812
813
814 if (BB != Header) {
815
816
817
818
819
821 continue;
822 }
823
824
825 if (Phi->getNumIncomingValues() != 2) {
827 "loop control flow is not understood by vectorizer",
828 "CFGNotUnderstood", ORE, TheLoop, Phi);
829 return false;
830 }
831
834 DT, PSE.getSE())) {
837 Reductions[Phi] = RedDes;
838 continue;
839 }
840
841
842
843
844
845 auto IsDisallowedStridedPointerInduction =
848 return false;
850 ID.getConstIntStepValue() == nullptr;
851 };
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
869 !IsDisallowedStridedPointerInduction(ID)) {
870 addInductionPhi(Phi, ID, AllowedExit);
872 continue;
873 }
874
876 AllowedExit.insert(Phi);
877 FixedOrderRecurrences.insert(Phi);
878 continue;
879 }
880
881
882
884 !IsDisallowedStridedPointerInduction(ID)) {
885 addInductionPhi(Phi, ID, AllowedExit);
886 continue;
887 }
888
890 "value that could not be identified as "
891 "reduction is used outside the loop",
892 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
893 return false;
894 }
895
896
897
898
899
900 auto *CI = dyn_cast(&I);
901
903 !isa(CI) &&
904 !(CI->getCalledFunction() && TLI &&
907
908
910 bool IsMathLibCall =
911 TLI && CI->getCalledFunction() &&
912 CI->getType()->isFloatingPointTy() &&
913 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
915
916 if (IsMathLibCall) {
917
918
919
920
922 "Found a non-intrinsic callsite",
923 "library call cannot be vectorized. "
924 "Try compiling with -fno-math-errno, -ffast-math, "
925 "or similar flags",
926 "CantVectorizeLibcall", ORE, TheLoop, CI);
927 } else {
929 "call instruction cannot be vectorized",
930 "CantVectorizeLibcall", ORE, TheLoop, CI);
931 }
932 return false;
933 }
934
935
936
937 if (CI) {
938 auto *SE = PSE.getSE();
940 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
943 TheLoop)) {
945 "intrinsic instruction cannot be vectorized",
946 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
947 return false;
948 }
949 }
950 }
951
952
953
955 VecCallVariantsFound = true;
956
957 auto CanWidenInstructionTy = [this](Instruction const &Inst) {
958 Type *InstTy = Inst.getType();
959 if (!isa(InstTy))
961
962
963
964
966 all_of(Inst.users(), IsaPred)) {
967
968
969 StructVecCallFound = true;
970 return true;
971 }
972
973 return false;
974 };
975
976
977
978
979 if (!CanWidenInstructionTy(I) ||
980 (isa(I) &&
982 isa(I)) {
984 "instruction return type cannot be vectorized",
985 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
986 return false;
987 }
988
989
990 if (auto *ST = dyn_cast(&I)) {
991 Type *T = ST->getValueOperand()->getType();
994 "CantVectorizeStore", ORE, TheLoop, ST);
995 return false;
996 }
997
998
999
1000 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1001
1003 assert(VecTy && "did not find vectorized version of stored type");
1006 "nontemporal store instruction cannot be vectorized",
1007 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1008 return false;
1009 }
1010 }
1011
1012 } else if (auto *LD = dyn_cast(&I)) {
1013 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1014
1015
1017 assert(VecTy && "did not find vectorized version of load type");
1020 "nontemporal load instruction cannot be vectorized",
1021 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1022 return false;
1023 }
1024 }
1025
1026
1027
1028
1029
1030
1031 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1032 .isFast()) {
1033 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1034 Hints->setPotentiallyUnsafe();
1035 }
1036
1037
1038
1040
1041
1042
1043
1046 continue;
1047 }
1049 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1050 return false;
1051 }
1052 }
1053 }
1054
1055 if (!PrimaryInduction) {
1056 if (Inductions.empty()) {
1058 "loop induction variable could not be identified",
1059 "NoInductionVariable", ORE, TheLoop);
1060 return false;
1061 }
1062 if (!WidestIndTy) {
1064 "integer loop induction variable could not be identified",
1065 "NoIntegerInductionVariable", ORE, TheLoop);
1066 return false;
1067 }
1068 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
1069 }
1070
1071
1072
1073
1074 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
1075 PrimaryInduction = nullptr;
1076
1077 return true;
1078}
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1096
1097
1101 return false;
1102
1103
1104
1105
1106
1107 Value *HIncVal = nullptr;
1110 return false;
1111
1112
1114 return false;
1115
1116
1118 if ()
1119 return false;
1120
1121
1122 Value *HIdx = nullptr;
1123 for (Value *Index : GEP->indices()) {
1124 if (HIdx)
1125 return false;
1126 if (!isa(Index))
1127 HIdx = Index;
1128 }
1129
1130 if (!HIdx)
1131 return false;
1132
1133
1134
1135
1136
1137
1138
1139
1142 return false;
1143
1144
1145 const auto *AR = dyn_cast(PSE.getSE()->getSCEV(VPtrVal));
1146 if (!AR || AR->getLoop() != TheLoop)
1147 return false;
1148
1149
1150
1154 return false;
1155
1156 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1157
1158
1159 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1160 return true;
1161}
1162
1163bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1164
1165
1167 return false;
1168
1169
1173
1174
1175 if (!Deps)
1176 return false;
1177
1179
1180
1183 continue;
1184
1185
1186
1187
1189 return false;
1190
1191 IUDep = &Dep;
1192 }
1193 if (!IUDep)
1194 return false;
1195
1196
1197 LoadInst *LI = dyn_cast(IUDep->getSource(DepChecker));
1199
1200 if (!LI || !SI)
1201 return false;
1202
1203 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1205}
1206
1207bool LoopVectorizationLegality::canVectorizeMemory() {
1208 LAI = &LAIs.getInfo(*TheLoop);
1210 if (LAR) {
1211 ORE->emit([&]() {
1213 "loop not vectorized: ", *LAR);
1214 });
1215 }
1216
1218 return canVectorizeIndirectUnsafeDependences();
1219
1222 "write to a loop invariant address could not "
1223 "be vectorized",
1224 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1225 TheLoop);
1226 return false;
1227 }
1228
1229
1230
1231
1232
1234
1235
1238 continue;
1239
1242 "We don't allow storing to uniform addresses",
1243 "write of conditional recurring variant value to a loop "
1244 "invariant address could not be vectorized",
1245 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1246 return false;
1247 }
1248
1249
1250
1251
1252 if (Instruction *Ptr = dyn_cast(SI->getPointerOperand())) {
1255 "Invariant address is calculated inside the loop",
1256 "write to a loop invariant address could not "
1257 "be vectorized",
1258 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1259 return false;
1260 }
1261 }
1262 }
1263
1265
1266
1267
1268
1269
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1286 I->getValueOperand()->getType() ==
1287 SI->getValueOperand()->getType();
1288 });
1289 continue;
1290 }
1292 }
1293
1294 bool IsOK = UnhandledStores.empty();
1295
1296 if (!IsOK) {
1298 "We don't allow storing to uniform addresses",
1299 "write to a loop invariant address could not "
1300 "be vectorized",
1301 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1302 return false;
1303 }
1304 }
1305 }
1306
1308 return true;
1309}
1310
1312 bool EnableStrictReductions) {
1313
1314
1315 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1316 return true;
1317
1318
1319
1320
1321 if (!EnableStrictReductions ||
1325 }))
1326 return false;
1327
1328
1329
1330
1334 }));
1335}
1336
1341 });
1342}
1343
1348 return false;
1349
1352 return V == InvariantAddress ||
1354 });
1355}
1356
1358 Value *In0 = const_cast<Value *>(V);
1359 PHINode *PN = dyn_cast_or_null(In0);
1360 if (!PN)
1361 return false;
1362
1363 return Inductions.count(PN);
1364}
1365
1369 return nullptr;
1373 return &ID;
1374 return nullptr;
1375}
1376
1380 return nullptr;
1383 return &ID;
1384 return nullptr;
1385}
1386
1388 const Value *V) const {
1389 auto *Inst = dyn_cast(V);
1390 return (Inst && InductionCastsToIgnore.count(Inst));
1391}
1392
1395}
1396
1398 const PHINode *Phi) const {
1399 return FixedOrderRecurrences.count(Phi);
1400}
1401
1403
1404
1405
1410 "Uncountable exiting block must be a direct predecessor of latch");
1411 return BB == Latch;
1412 }
1414}
1415
1416bool LoopVectorizationLegality::blockCanBePredicated(
1420
1421
1422 if (match(&I, m_IntrinsicIntrinsic::assume())) {
1424 continue;
1425 }
1426
1427
1428
1429
1430 if (isa(&I))
1431 continue;
1432
1433
1434
1435
1436
1437 if (CallInst *CI = dyn_cast(&I))
1439 MaskedOp.insert(CI);
1440 continue;
1441 }
1442
1443
1444 if (auto *LI = dyn_cast(&I)) {
1446 MaskedOp.insert(LI);
1447 continue;
1448 }
1449
1450
1451
1452
1453
1454
1455 if (auto *SI = dyn_cast(&I)) {
1456 MaskedOp.insert(SI);
1457 continue;
1458 }
1459
1460 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1461 return false;
1462 }
1463
1464 return true;
1465}
1466
1467bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1470 "IfConversionDisabled", ORE, TheLoop);
1471 return false;
1472 }
1473
1474 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1475
1476
1477
1478
1479
1480
1482
1483
1489 continue;
1490 }
1491
1492
1493
1494
1495
1496
1500 LoadInst *LI = dyn_cast(&I);
1501
1502
1503
1506 &Predicates))
1508 Predicates.clear();
1509 }
1510 }
1511
1512
1514
1515
1516 if (isa(BB->getTerminator())) {
1519 "LoopContainsUnsupportedSwitch", ORE,
1520 TheLoop, BB->getTerminator());
1521 return false;
1522 }
1523 } else if (!isa(BB->getTerminator())) {
1525 "LoopContainsUnsupportedTerminator", ORE,
1526 TheLoop, BB->getTerminator());
1527 return false;
1528 }
1529
1530
1532 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1534 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1535 ORE, TheLoop, BB->getTerminator());
1536 return false;
1537 }
1538 }
1539
1540
1541 return true;
1542}
1543
1544
1545bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1546 bool UseVPlanNativePath) {
1548 "VPlan-native path is not enabled.");
1549
1550
1551
1552
1553
1554
1555
1556
1559
1560
1561
1564 "loop control flow is not understood by vectorizer",
1565 "CFGNotUnderstood", ORE, TheLoop);
1566 if (DoExtraAnalysis)
1568 else
1569 return false;
1570 }
1571
1572
1575 "loop control flow is not understood by vectorizer",
1576 "CFGNotUnderstood", ORE, TheLoop);
1577 if (DoExtraAnalysis)
1579 else
1580 return false;
1581 }
1582
1584}
1585
1586bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1587 Loop *Lp, bool UseVPlanNativePath) {
1588
1589
1592 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1593 if (DoExtraAnalysis)
1595 else
1596 return false;
1597 }
1598
1599
1600
1601 for (Loop *SubLp : *Lp)
1602 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1603 if (DoExtraAnalysis)
1605 else
1606 return false;
1607 }
1608
1610}
1611
1612bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1614 if (!LatchBB) {
1616 "Cannot vectorize early exit loop",
1617 "NoLatchEarlyExit", ORE, TheLoop);
1618 return false;
1619 }
1620
1621 if (Reductions.size() || FixedOrderRecurrences.size()) {
1623 "Found reductions or recurrences in early-exit loop",
1624 "Cannot vectorize early exit loop with reductions or recurrences",
1625 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1626 return false;
1627 }
1628
1631
1632
1634 for (BasicBlock *BB : ExitingBlocks) {
1637 if (isa(EC)) {
1638 UncountableExitingBlocks.push_back(BB);
1639
1641 if (Succs.size() != 2) {
1643 "Early exiting block does not have exactly two successors",
1644 "Incorrect number of successors from early exiting block",
1645 "EarlyExitTooManySuccessors", ORE, TheLoop);
1646 return false;
1647 }
1648
1650 if (!TheLoop->contains(Succs[0]))
1651 ExitBlock = Succs[0];
1652 else {
1654 ExitBlock = Succs[1];
1655 }
1656 UncountableExitBlocks.push_back(ExitBlock);
1657 } else
1658 CountableExitingBlocks.push_back(BB);
1659 }
1660
1661
1662
1663
1664 Predicates.clear();
1665
1666
1669 "Loop has too many uncountable exits",
1670 "Cannot vectorize early exit loop with more than one early exit",
1671 "TooManyUncountableEarlyExits", ORE, TheLoop);
1672 return false;
1673 }
1674
1675
1676
1680 "Cannot vectorize early exit loop",
1681 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1682 return false;
1683 }
1684
1685
1686 if (isa(
1689 "Cannot determine exact exit count for latch block",
1690 "Cannot vectorize early exit loop",
1691 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1692 return false;
1693 }
1695 "Latch block not found in list of countable exits!");
1696
1697
1698
1699 auto IsSafeOperation = [](Instruction *I) -> bool {
1700 switch (I->getOpcode()) {
1701 case Instruction::Load:
1702 case Instruction::Store:
1703 case Instruction::PHI:
1704 case Instruction::Br:
1705
1706 return true;
1707 default:
1709 }
1710 };
1711
1712 for (auto *BB : TheLoop->blocks())
1713 for (auto &I : *BB) {
1714 if (I.mayWriteToMemory()) {
1715
1717 "Writes to memory unsupported in early exit loops",
1718 "Cannot vectorize early exit loop with writes to memory",
1719 "WritesInEarlyExitLoop", ORE, TheLoop);
1720 return false;
1721 } else if (!IsSafeOperation(&I)) {
1723 "cannot be speculatively executed",
1724 "UnsafeOperationsEarlyExitLoop", ORE,
1725 TheLoop);
1726 return false;
1727 }
1728 }
1729
1730
1732 "Expected latch predecessor to be the early exiting block");
1733
1734
1735 Predicates.clear();
1737 &Predicates)) {
1739 "Loop may fault",
1740 "Cannot vectorize potentially faulting early exit loop",
1741 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1742 return false;
1743 }
1744
1745 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1747
1748
1749 assert(!isa(SymbolicMaxBTC) &&
1750 "Failed to get symbolic expression for backedge taken count");
1751 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1752 "backedge taken count: "
1753 << *SymbolicMaxBTC << '\n');
1754 return true;
1755}
1756
1758
1759
1760 bool Result = true;
1761
1763
1764
1765 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1766 if (DoExtraAnalysis) {
1767 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1768 Result = false;
1769 } else {
1770 return false;
1771 }
1772 }
1773
1774
1776 << '\n');
1777
1778
1779
1781 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1782
1783 if (!canVectorizeOuterLoop()) {
1785 "UnsupportedOuterLoop", ORE, TheLoop);
1786
1787
1788 return false;
1789 }
1790
1791 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1792 return Result;
1793 }
1794
1796
1797 unsigned NumBlocks = TheLoop->getNumBlocks();
1798 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1799 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1800 if (DoExtraAnalysis)
1801 Result = false;
1802 else
1803 return false;
1804 }
1805
1806
1807 if (!canVectorizeInstrs()) {
1808 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1809 if (DoExtraAnalysis)
1810 Result = false;
1811 else
1812 return false;
1813 }
1814
1815 HasUncountableEarlyExit = false;
1819 "UnsupportedUncountableLoop", ORE, TheLoop);
1820 if (DoExtraAnalysis)
1821 Result = false;
1822 else
1823 return false;
1824 } else {
1825 HasUncountableEarlyExit = true;
1826 if (!isVectorizableEarlyExitLoop()) {
1827 UncountableExitingBlocks.clear();
1828 HasUncountableEarlyExit = false;
1829 if (DoExtraAnalysis)
1830 Result = false;
1831 else
1832 return false;
1833 }
1834 }
1835 }
1836
1837
1838 if (!canVectorizeMemory()) {
1839 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1840 if (DoExtraAnalysis)
1841 Result = false;
1842 else
1843 return false;
1844 }
1845
1846 if (Result) {
1847 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1849 ? " (with a runtime bound check)"
1850 : "")
1851 << "!\n");
1852 }
1853
1857
1859 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
1860 "due to SCEVThreshold");
1862 "Too many SCEV assumptions need to be made and checked at runtime",
1863 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1864 if (DoExtraAnalysis)
1865 Result = false;
1866 else
1867 return false;
1868 }
1869
1870
1871
1872
1873
1874 return Result;
1875}
1876
1878
1879 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1880
1882
1884 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1885
1886
1887 for (auto *AE : AllowedExit) {
1888
1889
1890 if (ReductionLiveOuts.count(AE))
1891 continue;
1892 for (User *U : AE->users()) {
1895 continue;
1898 << "LV: Cannot fold tail by masking, loop has an outside user for "
1899 << *UI << "\n");
1900 return false;
1901 }
1902 }
1903
1905 PHINode *OrigPhi = Entry.first;
1906 for (User *U : OrigPhi->users()) {
1907 auto *UI = cast(U);
1908 if (!TheLoop->contains(UI)) {
1909 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
1910 "outside user for "
1911 << *UI << "\n");
1912 return false;
1913 }
1914 }
1915 }
1916
1917
1919
1920
1921
1924 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1925 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
1926 return false;
1927 }
1928 }
1929
1930 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1931
1932 return true;
1933}
1934
1936
1938
1939
1940
1942 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1943 assert(R && "Must be able to predicate block when tail-folding.");
1944 }
1945}
1946
1947}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM Basic Block Representation.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
const LoopAccessInfo & getInfo(Loop &L)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
bool isLoopHeader(const BlockT *BB) const
const SmallVector< BasicBlock *, 4 > & getUncountableExitingBlocks() const
Returns all the exiting blocks with an uncountable exit.
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has an uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Instruction * getExactFPInst()
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_Unspecified
Not selected.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
enum ForceKind getForce() const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
ElementCount getWidth() const
@ FK_Enabled
Forcing enabled.
@ FK_Undefined
Not selected.
@ FK_Disabled
Forcing disabled.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
unsigned getInterleave() const
unsigned getIsVectorized() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
static MDString * get(LLVMContext &Context, StringRef Str)
size_type count(const KeyT &Key) const
iterator find(const KeyT &Key)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
const Loop * getLoop() const
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getPredicatedExitCount(const Loop *L, const BasicBlock *ExitingBlock, SmallVectorImpl< const SCEVPredicate * > *Predicates, ExitCountKind Kind=Exact)
Same as above except this uses the predicated backedge taken info and may require predicates.
const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
bool enableScalableVectorization() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr bool isZero() const
const ParentTy * getParent() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if the loop L cannot fault on any iteration and only contains read-only memory accesses.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
An object of this class is returned by queries that could not be answered.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.