LLVM: lib/Transforms/Vectorize/VPlan.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
46#include
47#include
48
49using namespace llvm;
51
52namespace llvm {
54}
56
58 "vplan-print-in-dot-format", cl::Hidden,
59 cl::desc("Use dot format instead of plain text when dumping VPlans"));
60
61#define DEBUG_TYPE "loop-vectorize"
62
63#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
65 const VPInstruction *Instr = dyn_cast(&V);
67 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
69 return OS;
70}
71#endif
72
75 switch (LaneKind) {
77
81 return Builder.getInt32(Lane);
82 }
84}
85
87 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
89 Def->addDefinedValue(this);
90}
91
93 assert(Users.empty() && "trying to delete a VPValue with remaining users");
95 Def->removeDefinedValue(this);
96}
97
98#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
100 if (const VPRecipeBase *R = dyn_cast_or_null(Def))
102 else
104}
105
107 const VPRecipeBase *Instr = dyn_cast_or_null(this->Def);
109 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
111 dbgs() << "\n";
112}
113
115 const VPRecipeBase *Instr = dyn_cast_or_null(this);
117 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
119 dbgs() << "\n";
120}
121#endif
122
124 return cast_or_null(Def);
125}
126
128 return cast_or_null(Def);
129}
130
131
132
134 T *Next = Start;
135 T *Current = Start;
136 while ((Next = Next->getParent()))
137 Current = Next;
138
140 WorkList.insert(Current);
141
142 for (unsigned i = 0; i < WorkList.size(); i++) {
143 T *Current = WorkList[i];
144 if (Current->getNumPredecessors() == 0)
145 return Current;
146 auto &Predecessors = Current->getPredecessors();
147 WorkList.insert(Predecessors.begin(), Predecessors.end());
148 }
149
150 llvm_unreachable("VPlan without any entry node without predecessors");
151}
152
154
156
157
162 return cast(Block);
163}
164
169 return cast(Block);
170}
171
173 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
174 Plan = ParentPlan;
175}
176
177
182 return cast(Block);
183}
184
189 return cast(Block);
190}
191
193 if (!Successors.empty() || !Parent)
194 return this;
196 "Block w/o successors not the exiting block of its parent.");
198}
199
201 if (!Predecessors.empty() || !Parent)
202 return this;
204 "Block w/o predecessors not the entry of its parent.");
206}
207
210 while (It != end() && It->isPhi())
211 It++;
212 return It;
213}
214
219 Loop *CurrentParentLoop, Type *CanonicalIVTy)
220 : TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
221 CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
222 TypeAnalysis(CanonicalIVTy) {}
223
225 if (Def->isLiveIn())
226 return Def->getLiveInIRValue();
227
230
234 }
235
238 if (!VecPart->getType()->isVectorTy()) {
239 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
240 return VecPart;
241 }
242
245
246 return Extract;
247}
248
250 if (NeedsScalar) {
255 "Trying to access a single scalar per part but has multiple scalars "
256 "per part.");
258 }
259
260
263
264 auto GetBroadcastInstrs = [this, Def](Value *V) {
265 bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
267 return V;
268
270 if (SafeToHoist) {
273 if (LoopVectorPreHeader)
275 }
276
277
278
280
281 return Shuf;
282 };
283
285 assert(Def->isLiveIn() && "expected a live-in");
286 Value *IRV = Def->getLiveInIRValue();
287 Value *B = GetBroadcastInstrs(IRV);
289 return B;
290 }
291
293
294
296 set(Def, ScalarValue);
297 return ScalarValue;
298 }
299
301
303
305
306
309 "unexpected recipe found to be invariant");
310 IsUniform = true;
311 LastLane = 0;
312 }
313
314 auto *LastInst = cast(get(Def, LastLane));
315
316
317
319 auto NewIP =
320 isa(LastInst)
324
325
326
327
328
329
330
331 Value *VectorValue = nullptr;
332 if (IsUniform) {
333 VectorValue = GetBroadcastInstrs(ScalarValue);
334 set(Def, VectorValue);
335 } else {
336
339 set(Def, Undef);
342 VectorValue = get(Def);
343 }
345 return VectorValue;
346}
347
349 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
351}
352
355
356
357 if (LVer && isa<LoadInst, StoreInst>(Orig))
359}
360
362
364 return;
365
366 if (Instruction *ToI = dyn_cast(To)) {
369 }
370}
371
374
375
376 if (DIL &&
381
383 auto NewDIL =
385 if (NewDIL)
387 else
388 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
389 << DIL->getFilename() << " Line: " << DIL->getLine());
390 } else
392}
393
397 Value *VectorValue = get(Def);
400 set(Def, VectorValue);
401}
402
405
406
411
412 return NewBB;
413}
414
417
418 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
422
423 assert(PredBB && "Predecessor basic-block not found building successor.");
424 auto *PredBBTerminator = PredBB->getTerminator();
426
427 auto *TermBr = dyn_cast(PredBBTerminator);
428 if (isa(PredBBTerminator)) {
429 assert(PredVPSuccessors.size() == 1 &&
430 "Predecessor ending w/o branch must have single successor.");
431 DebugLoc DL = PredBBTerminator->getDebugLoc();
434 Br->setDebugLoc(DL);
435 } else if (TermBr && !TermBr->isConditional()) {
436 TermBr->setSuccessor(0, NewBB);
437 } else {
438
439
440 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
441 assert((TermBr && (!TermBr->getSuccessor(idx) ||
442 (isa(this) &&
443 TermBr->getSuccessor(idx) == NewBB))) &&
444 "Trying to reset an existing successor block.");
445 TermBr->setSuccessor(idx, NewBB);
446 }
448 }
449}
450
452 assert(getHierarchicalSuccessors().size() <= 2 &&
453 "VPIRBasicBlock can have at most two successors at the moment!");
457 executeRecipes(State, IRBB);
458
459
460 if (getSingleSuccessor() && isa(IRBB->getTerminator())) {
463 IRBB->getTerminator()->eraseFromParent();
464 } else {
466 (getNumSuccessors() == 0 || isa(IRBB->getTerminator())) &&
467 "other blocks must be terminated by a branch");
468 }
469
470 connectToPredecessors(State->CFG);
471}
472
474 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
477 return NewBlock;
478}
479
481 bool Replica = bool(State->Lane);
483
484 auto IsReplicateRegion = [](VPBlockBase *BB) {
485 auto *R = dyn_cast_or_null(BB);
486 return R && R->isReplicator();
487 };
488
489
490 if ((Replica && this == getParent()->getEntry()) ||
491 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
492
493
494
496 } else {
497 NewBB = createEmptyBasicBlock(State->CFG);
498
500
502
503
507
510 connectToPredecessors(State->CFG);
511 }
512
513
514 executeRecipes(State, NewBB);
515}
516
518 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
521 return NewBlock;
522}
523
526 << " in BB:" << BB->getName() << '\n');
527
529
532
534}
535
537 assert((SplitAt == end() || SplitAt->getParent() == this) &&
538 "can only split at a position in the same block");
539
541
542 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
544
545
549
551}
552
553
554
556 if (P && P->isReplicator()) {
558
559
560 assert(( || !cast(P)->isReplicator()) &&
561 "unexpected nested replicate regions");
562 }
563 return P;
564}
565
568}
569
572}
573
575 if (VPBB->empty()) {
578 "block with multiple successors doesn't have a recipe as terminator");
579 return false;
580 }
581
583 bool IsCondBranch = isa(R) ||
587
590 assert(IsCondBranch && "block with multiple successors not terminated by "
591 "conditional branch recipe");
592
593 return true;
594 }
595
598 "block with 0 or 1 successors terminated by conditional branch recipe");
599 return false;
600}
601
604 return &back();
605 return nullptr;
606}
607
610 return &back();
611 return nullptr;
612}
613
616}
617
618#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
620 if (getSuccessors().empty()) {
621 O << Indent << "No successors\n";
622 } else {
623 O << Indent << "Successor(s): ";
624 ListSeparator LS;
625 for (auto *Succ : getSuccessors())
626 O << LS << Succ->getName();
627 O << '\n';
628 }
629}
630
633 O << Indent << getName() << ":\n";
634
635 auto RecipeIndent = Indent + " ";
638 O << '\n';
639 }
640
641 printSuccessors(O, Indent);
642}
643#endif
644
646
647
648
649
650
651
655 bool InRegion = Entry->getParent();
656
659 Old2NewVPBlocks[BB] = NewBB;
660 if (InRegion && BB->getNumSuccessors() == 0) {
661 assert(!Exiting && "Multiple exiting blocks?");
662 Exiting = BB;
663 }
664 }
665 assert((!InRegion || Exiting) && "regions must have a single exiting block");
666
667
669 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
672 NewPreds.push_back(Old2NewVPBlocks[Pred]);
673 }
677 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
678 }
680 }
681
682#if !defined(NDEBUG)
683
684
685 for (const auto &[OldBB, NewBB] :
688 for (const auto &[OldPred, NewPred] :
689 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
690 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
691
692 for (const auto &[OldSucc, NewSucc] :
693 zip(OldBB->successors(), NewBB->successors()))
694 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
695 }
696#endif
697
698 return std::make_pair(Old2NewVPBlocks[Entry],
699 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
700}
701
703 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
704 auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
705 getName(), isReplicator());
707 Block->setParent(NewRegion);
708 return NewRegion;
709}
710
713 RPOT(Entry);
714
715 if (!isReplicator()) {
716
721
722
723
724 if (ParentLoop)
726 else
728
729
732 Block->execute(State);
733 }
734
736 return;
737 }
738
739 assert(!State->Lane && "Replicating a Region with non-null instance.");
740
741
742 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
747
750 Block->execute(State);
751 }
752 }
753
754
755 State->Lane.reset();
756}
757
763}
764
766 if (!isReplicator()) {
774 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
775 << ": vector loop backedge\n");
776 Cost += BackedgeCost;
778 }
779
780
781
782
783
786
787
788
789
791 VPBasicBlock *Then = cast(getEntry()->getSuccessors()[0]);
793
794
795
798
799 return ThenCost;
800}
801
802#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
805 O << Indent << (isReplicator() ? " " : " ") << getName() << ": {";
806 auto NewIndent = Indent + " ";
808 O << '\n';
809 BlockBase->print(O, NewIndent, SlotTracker);
810 }
811 O << Indent << "}\n";
812
813 printSuccessors(O, Indent);
814}
815#endif
816
818 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
819 ScalarHeader = createVPIRBasicBlock(L->getHeader());
820}
821
824
825 for (auto *VPB : CreatedBlocks) {
826 if (auto *VPBB = dyn_cast(VPB)) {
827
828
830 for (auto *Def : R.definedValues())
831 Def->replaceAllUsesWith(&DummyValue);
832
833 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
834 R.setOperand(I, &DummyValue);
835 }
836 }
837 delete VPB;
838 }
839 for (VPValue *VPV : VPLiveInsToFree)
840 delete VPV;
841 if (BackedgeTakenCount)
842 delete BackedgeTakenCount;
843}
844
847 bool RequiresScalarEpilogueCheck,
848 bool TailFolded, Loop *TheLoop) {
849 auto Plan = std::make_unique(TheLoop);
851
852
853
854
855
856
857
860
861
862
863
865 assert(!isa(BackedgeTakenCountSCEV) &&
866 "Invalid loop count");
869 InductionTy, TheLoop);
870 Plan->TripCount =
872
873
874
879 HeaderVPBB, LatchVPBB, "vector loop", false );
880
884
887 if (!RequiresScalarEpilogueCheck) {
890 }
891
892
893
894
895
896
897
898
899
902
905
907
908
909
910
913 TailFolded
918 ScalarLatchTerm->getDebugLoc(), "cmp.n");
920 ScalarLatchTerm->getDebugLoc());
922}
923
927
928 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
930 auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1),
931 "trip.count.minus.1");
932 BackedgeTakenCount->setUnderlyingValue(TCMO);
933 }
934
935 VectorTripCount.setUnderlyingValue(VectorTripCountV);
936
938
939 assert((!getVectorLoopRegion() || VFxUF.getNumUsers()) &&
940 "VFxUF expected to always have users");
941 unsigned UF = getUF();
942 if (VF.getNumUsers()) {
944 VF.setUnderlyingValue(RuntimeVF);
945 VFxUF.setUnderlyingValue(
946 UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
947 : RuntimeVF);
948 } else {
950 }
951}
952
953
954
955
957
960
961
963 cast(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
966
967 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
968 << ", UF=" << getUF() << '\n');
969 setName("Final VPlan");
971
972
973
974
981
982
983
986
988 Entry);
989
990
992 Block->execute(State);
993
995
996 auto *LoopRegion = getVectorLoopRegion();
997 if (!LoopRegion)
998 return;
999
1002
1003
1004
1005 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1007
1008 if (isa(&R))
1009 continue;
1010
1011 if (isa(&R)) {
1013 if (isa(&R)) {
1014 Phi = cast(State->get(R.getVPSingleValue()));
1015 } else {
1016 auto *WidenPhi = cast(&R);
1018 "recipe generating only scalars should have been replaced");
1019 auto *GEP = cast(State->get(WidenPhi));
1020 Phi = cast(GEP->getPointerOperand());
1021 }
1022
1023 Phi->setIncomingBlock(1, VectorLatchBB);
1024
1025
1026
1027 Instruction *Inc = cast(Phi->getIncomingValue(1));
1029
1030
1031 if (auto *IV = dyn_cast(&R))
1032 Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1033 continue;
1034 }
1035
1036 auto *PhiR = cast(&R);
1037 bool NeedsScalar = isa(PhiR) ||
1038 (isa(PhiR) &&
1039 cast(PhiR)->isInLoop());
1040 Value *Phi = State->get(PhiR, NeedsScalar);
1041 Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1042 cast(Phi)->addIncoming(Val, VectorLatchBB);
1043 }
1044}
1045
1047
1048
1049 return getVectorLoopRegion()->cost(VF, Ctx);
1050}
1051
1053
1055 if (auto *R = dyn_cast(B))
1056 return R->isReplicator() ? nullptr : R;
1057 return nullptr;
1058}
1059
1062 if (auto *R = dyn_cast(B))
1063 return R->isReplicator() ? nullptr : R;
1064 return nullptr;
1065}
1066
1067#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1070
1071 if (VF.getNumUsers() > 0) {
1072 O << "\nLive-in ";
1074 O << " = VF";
1075 }
1076
1077 if (VFxUF.getNumUsers() > 0) {
1078 O << "\nLive-in ";
1080 O << " = VF * UF";
1081 }
1082
1083 if (VectorTripCount.getNumUsers() > 0) {
1084 O << "\nLive-in ";
1085 VectorTripCount.printAsOperand(O, SlotTracker);
1086 O << " = vector-trip-count";
1087 }
1088
1089 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1090 O << "\nLive-in ";
1091 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1092 O << " = backedge-taken count";
1093 }
1094
1095 O << "\n";
1096 if (TripCount->isLiveIn())
1097 O << "Live-in ";
1098 TripCount->printAsOperand(O, SlotTracker);
1099 O << " = original trip-count";
1100 O << "\n";
1101}
1102
1106
1107 O << "VPlan '" << getName() << "' {";
1108
1109 printLiveIns(O);
1110
1112 RPOT(getEntry());
1114 O << '\n';
1116 }
1117
1118 O << "}\n";
1119}
1120
1122 std::string Out;
1124 RSO << Name << " for ";
1125 if (!VFs.empty()) {
1126 RSO << "VF={" << VFs[0];
1128 RSO << "," << VF;
1129 RSO << "},";
1130 }
1131
1132 if (UFs.empty()) {
1133 RSO << "UF>=1";
1134 } else {
1135 RSO << "UF={" << UFs[0];
1137 RSO << "," << UF;
1138 RSO << "}";
1139 }
1140
1141 return Out;
1142}
1143
1148}
1149
1152#endif
1153
1156
1157
1158
1160 OldDeepRPOT(Entry);
1162 NewDeepRPOT(NewEntry);
1163
1164
1165 for (const auto &[OldBB, NewBB] :
1166 zip(VPBlockUtils::blocksOnly(OldDeepRPOT),
1167 VPBlockUtils::blocksOnly(NewDeepRPOT))) {
1168 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1169 "blocks must have the same number of recipes");
1170 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1171 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1172 "recipes must have the same number of operands");
1173 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1174 "recipes must define the same number of operands");
1175 for (const auto &[OldV, NewV] :
1176 zip(OldR.definedValues(), NewR.definedValues()))
1177 Old2NewVPValues[OldV] = NewV;
1178 }
1179 }
1180
1181
1183 VPBlockUtils::blocksOnly(NewDeepRPOT)) {
1185 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1186 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1187 NewR.setOperand(I, NewOp);
1188 }
1189 }
1190}
1191
1193 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1194
1195 const auto &[NewEntry, __] = cloneFrom(Entry);
1196
1197 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1200 auto *VPIRBB = dyn_cast(VPB);
1201 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1202 }));
1203
1204 auto *NewPlan = new VPlan(cast(NewEntry), NewScalarHeader);
1206 for (VPValue *OldLiveIn : VPLiveInsToFree) {
1207 Old2NewVPValues[OldLiveIn] =
1208 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1209 }
1210 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1211 Old2NewVPValues[&VF] = &NewPlan->VF;
1212 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1213 if (BackedgeTakenCount) {
1214 NewPlan->BackedgeTakenCount = new VPValue();
1215 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1216 }
1217 assert(TripCount && "trip count must be set");
1218 if (TripCount->isLiveIn())
1219 Old2NewVPValues[TripCount] =
1220 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1221
1222
1223
1224 remapOperands(Entry, NewEntry, Old2NewVPValues);
1225
1226
1227 NewPlan->VFs = VFs;
1228 NewPlan->UFs = UFs;
1229
1230 NewPlan->Name = Name;
1232 "TripCount must have been added to Old2NewVPValues");
1233 NewPlan->TripCount = Old2NewVPValues[TripCount];
1234
1235
1236
1237 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1238 for (unsigned I :
1239 seq(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1240 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1241 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1242
1243 return NewPlan;
1244}
1245
1248 CreatedBlocks.push_back(VPIRBB);
1249 return VPIRBB;
1250}
1251
1253 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1257 return VPIRBB;
1258}
1259
1260#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1261
1263 return (isa(Block) ? "cluster_N" : "N") +
1265}
1266
1268 const std::string &Name = Block->getName();
1269 if (.empty())
1270 return Name;
1271 return "VPB" + Twine(getOrCreateBID(Block));
1272}
1273
1276 bumpIndent(0);
1277 OS << "digraph VPlan {\n";
1278 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1281
1282 {
1283
1284 std::string Str;
1289 for (auto Line : Lines)
1291 }
1292
1293 OS << "\"]\n";
1294 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1295 OS << "edge [fontname=Courier, fontsize=30]\n";
1296 OS << "compound=true\n";
1297
1299 dumpBlock(Block);
1300
1301 OS << "}\n";
1302}
1303
1309 else
1311}
1312
1314 bool Hidden, const Twine &Label) {
1315
1316
1319 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1320 OS << " [ label=\"" << Label << '\"';
1322 OS << " ltail=" << getUID(From);
1323 if (Head != To)
1324 OS << " lhead=" << getUID(To);
1325 if (Hidden)
1326 OS << "; splines=none";
1327 OS << "]\n";
1328}
1329
1331 auto &Successors = Block->getSuccessors();
1332 if (Successors.size() == 1)
1333 drawEdge(Block, Successors.front(), false, "");
1334 else if (Successors.size() == 2) {
1335 drawEdge(Block, Successors.front(), false, "T");
1336 drawEdge(Block, Successors.back(), false, "F");
1337 } else {
1338 unsigned SuccessorNumber = 0;
1339 for (auto *Successor : Successors)
1341 }
1342}
1343
1345
1346
1347 OS << Indent << getUID(BasicBlock) << " [label =\n";
1348 bumpIndent(1);
1349 std::string Str;
1351
1353
1354
1355
1358
1361 };
1362
1363
1365 EmitLine(Line, " +\n");
1366 EmitLine(Lines.back(), "\n");
1367
1368 bumpIndent(-1);
1369 OS << Indent << "]\n";
1370
1372}
1373
1375 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1376 bumpIndent(1);
1377 OS << Indent << "fontname=Courier\n"
1378 << Indent << "label=\""
1381
1384 dumpBlock(Block);
1385 bumpIndent(-1);
1386 OS << Indent << "}\n";
1388}
1389
1391 if (auto *Inst = dyn_cast(V)) {
1392 if (!Inst->getType()->isVoidTy()) {
1393 Inst->printAsOperand(O, false);
1394 O << " = ";
1395 }
1396 O << Inst->getOpcodeName() << " ";
1397 unsigned E = Inst->getNumOperands();
1398 if (E > 0) {
1399 Inst->getOperand(0)->printAsOperand(O, false);
1400 for (unsigned I = 1; I < E; ++I)
1401 Inst->getOperand(I)->printAsOperand(O << ", ", false);
1402 }
1403 } else
1404 V->printAsOperand(O, false);
1405}
1406
1407#endif
1408
1409
1410
1415}
1416
1419}
1421 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1422}
1423
1427
1428
1429
1430 if (this == New)
1431 return;
1432
1433 for (unsigned J = 0; J < getNumUsers();) {
1435 bool RemovedUser = false;
1438 continue;
1439
1440 RemovedUser = true;
1442 }
1443
1444
1445
1446 if (!RemovedUser)
1447 J++;
1448 }
1449}
1450
1451#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1454}
1455
1459 });
1460}
1461#endif
1462
1464 Old2NewTy &Old2New,
1469 visitBlock(Base, Old2New, IAI);
1470 }
1471}
1472
1473void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1477 if (isa(&VPI))
1478 continue;
1479 assert(isa(&VPI) && "Can only handle VPInstructions");
1480 auto *VPInst = cast(&VPI);
1481
1482 auto *Inst = dyn_cast_or_null(VPInst->getUnderlyingValue());
1483 if (!Inst)
1484 continue;
1486 if (!IG)
1487 continue;
1488
1489 auto NewIGIter = Old2New.find(IG);
1490 if (NewIGIter == Old2New.end())
1492 IG->getFactor(), IG->isReverse(), IG->getAlign());
1493
1494 if (Inst == IG->getInsertPos())
1495 Old2New[IG]->setInsertPos(VPInst);
1496
1497 InterleaveGroupMap[VPInst] = Old2New[IG];
1498 InterleaveGroupMap[VPInst]->insertMember(
1499 VPInst, IG->getIndex(Inst),
1500 Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1501 : IG->getFactor()));
1502 }
1504 visitRegion(Region, Old2New, IAI);
1505 else
1507}
1508
1513}
1514
1515void VPSlotTracker::assignName(const VPValue *V) {
1516 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1517 auto *UV = V->getUnderlyingValue();
1518 auto *VPI = dyn_cast_or_null(V->getDefiningRecipe());
1519 if (!UV && !(VPI && !VPI->getName().empty())) {
1520 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1521 NextSlot++;
1522 return;
1523 }
1524
1525
1526
1527 std::string Name;
1528 if (UV) {
1530 UV->printAsOperand(S, false);
1531 } else
1532 Name = VPI->getName();
1533
1534 assert(.empty() && "Name cannot be empty.");
1536 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1537
1538
1539 const auto &[A, _] = VPValue2Name.insert({V, BaseName});
1540
1541
1542 if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1543 return;
1544
1545
1546
1547 const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0});
1548 if (!UseInserted) {
1549 C->second++;
1550 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1551 }
1552}
1553
1554void VPSlotTracker::assignNames(const VPlan &Plan) {
1556 assignName(&Plan.VF);
1558 assignName(&Plan.VFxUF);
1559 assignName(&Plan.VectorTripCount);
1560 if (Plan.BackedgeTakenCount)
1561 assignName(Plan.BackedgeTakenCount);
1563 assignName(LI);
1564
1568 VPBlockUtils::blocksOnly(RPOT))
1569 assignNames(VPBB);
1570}
1571
1572void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1574 for (VPValue *Def : Recipe.definedValues())
1575 assignName(Def);
1576}
1577
1579 std::string Name = VPValue2Name.lookup(V);
1580 if (.empty())
1581 return Name;
1582
1583
1584
1585
1586
1587
1588
1589
1590 const VPRecipeBase *DefR = V->getDefiningRecipe();
1591 (void)DefR;
1593 "VPValue defined by a recipe in a VPlan?");
1594
1595
1596 if (auto *UV = V->getUnderlyingValue()) {
1597 std::string Name;
1599 UV->printAsOperand(S, false);
1600 return (Twine("ir<") + Name + ">").str();
1601 }
1602
1603 return "";
1604}
1605
1608 assert(.isEmpty() && "Trying to test an empty VF range.");
1609 bool PredicateAtRangeStart = Predicate(Range.Start);
1610
1612 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1613 Range.End = TmpVF;
1614 break;
1615 }
1616
1617 return PredicateAtRangeStart;
1618}
1619
1620
1621
1622
1623
1624
1627 auto MaxVFTimes2 = MaxVF * 2;
1629 VFRange SubRange = {VF, MaxVFTimes2};
1630 auto Plan = buildVPlan(SubRange);
1632 VPlans.push_back(std::move(Plan));
1634 }
1635}
1636
1640 1 &&
1641 "Multiple VPlans for VF.");
1642
1645 return *Plan.get();
1646 }
1648}
1649
1650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1652 if (VPlans.empty()) {
1653 O << "LV: No VPlans built.\n";
1654 return;
1655 }
1656 for (const auto &Plan : VPlans)
1659 else
1661}
1662#endif
1663
1666 if (!V->isLiveIn())
1667 return {};
1668
1670}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
dxil pretty DXIL Metadata Pretty Printer
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static void dumpEdges(CFGMST< Edge, BBInfo > &MST, GCOVFunction &GF)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file provides utility VPlan to VPlan transformations.
static T * getPlanEntry(T *Start)
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
cl::opt< unsigned > ForceTargetInstructionCost
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
static const uint32_t IV[8]
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the basic block to an output stream with an optional AssemblyAnnotationWriter.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static ConstantInt * getTrue(LLVMContext &Context)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isScalar() const
Exactly one element.
bool shouldEmitDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
UnreachableInst * CreateUnreachable()
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
InsertPoint saveIP() const
Returns the current insert point.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
static InstructionCost getInvalid(CostType Val=0)
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
The group of interleaved loads/stores sharing the same stride and close to each other.
Drive the analysis of interleaved memory accesses in the loop.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getUniqueLatchExitBlock() const
Return the unique exit block for the latch, or null if there are multiple different exit blocks or th...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
void annotateInstWithNoAlias(Instruction *VersionedInst, const Instruction *OrigInst)
Add the noalias annotations to VersionedInst.
Represents a single loop in the control flow graph.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class provides computation of slot numbers for LLVM Assembly writing.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt1Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
void connectToPredecessors(VPTransformState::CFGState &CFG)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
iterator begin()
Recipe iterator methods.
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
bool isExiting() const
Returns true if the block is exiting it's parent region.
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
iterator_range< VPBlockBase ** > successors()
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
VPBlockBase * getEnclosingBlockWithPredecessors()
const VPBlocksTy & getPredecessors() const
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
const VPBlocksTy & getHierarchicalSuccessors()
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
const VPBasicBlock * getEntryBasicBlock() const
Helper for GraphTraits specialization that traverses through VPRegionBlocks.
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
VPlan-based builder utility analogous to IRBuilder.
This class augments a recipe with a set of VPValues defined by the recipe.
void dump() const
Dump the VPDef to stderr (for debugging).
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Recipe to expand a SCEV expression.
A special type of VPBasicBlock that wraps an existing IR basic block.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
A recipe to wrap on original IR instruction not to be modified during execution, execept for PHIs.
This is a concrete Recipe that models a single VPlan-level instruction.
VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x > or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
void dump() const
Dump the value to stderr (for debugging).
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
VPDef * Def
Pointer to the VPDef that defines this VPValue.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPlanPrinter prints a given VPlan to a given output stream.
LLVM_DUMP_METHOD void dump()
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
VPBasicBlock * getEntry()
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
VPValue & getVectorTripCount()
The vector trip count.
VPValue * getTripCount() const
The trip count of the original loop.
static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header) which cont...
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
bool hasVF(ElementCount VF)
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
void print(raw_ostream &O) const
Print this VPlan to O.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
std::string EscapeString(const std::string &Label)
bool match(Val *V, const Pattern &P)
BinaryVPInstruction_match< Op0_t, Op1_t, VPInstruction::BranchOnCount > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
UnaryVPInstruction_match< Op0_t, VPInstruction::BranchOnCond > m_BranchOnCond(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
auto successors(const MachineBasicBlock *BB)
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
cl::opt< bool > EnableFSDiscriminator
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
std::unique_ptr< VPlan > VPlanPtr
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
DomTreeUpdater DTU
Updater for the DominatorTree.
DenseMap< VPValue *, Value * > VPV2Vector
DenseMap< VPValue *, SmallVector< Value *, 4 > > VPV2Scalars
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
bool hasScalarValue(VPValue *Def, VPLane Lane)
bool hasVectorValue(VPValue *Def)
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane)
Construct the vector value of a scalarized value V one lane at a time.
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
struct llvm::VPTransformState::CFGState CFG
LoopVersioning * LVer
LoopVersioning.
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
VPlan * Plan
Pointer to the VPlan code is generated for.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
void print(raw_ostream &O) const
static void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...