LLVM: lib/Transforms/Vectorize/LoopIdiomVectorize.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
77
78using namespace llvm;
80
81#define DEBUG_TYPE "loop-idiom-vectorize"
82
85 cl::desc("Disable Loop Idiom Vectorize Pass."));
86
89 cl::desc("The vectorization style for loop idiom transform."),
91 "Use masked vector intrinsics"),
93 "predicated", "Use VP intrinsics")),
95
99 cl::desc("Proceed with Loop Idiom Vectorize Pass, but do "
100 "not convert byte-compare loop(s)."));
101
104 cl::desc("The vectorization factor for byte-compare patterns."),
106
110 cl::desc("Do not convert find-first-byte loop(s)."));
111
114 cl::desc("Verify loops generated Loop Idiom Vectorize Pass."));
115
116namespace {
117class LoopIdiomVectorize {
119 unsigned ByteCompareVF;
120 Loop *CurLoop = nullptr;
125
126
128 BasicBlock *VectorLoopPreheaderBlock = nullptr;
129 BasicBlock *VectorLoopStartBlock = nullptr;
130 BasicBlock *VectorLoopMismatchBlock = nullptr;
131 BasicBlock *VectorLoopIncBlock = nullptr;
132
133public:
137 : VectorizeStyle(S), ByteCompareVF(VF), DT(DT), LI(LI), TTI(TTI), DL(DL) {
138 }
139
140 bool run(Loop *L);
141
142private:
143
144
145
146 bool runOnCountableLoop();
147 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
148 SmallVectorImpl<BasicBlock *> &ExitBlocks);
149
150 bool recognizeByteCompare();
151
152 Value *expandFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU,
153 GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
154 Instruction *Index, Value *Start, Value *MaxLen);
155
156 Value *createMaskedFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU,
157 GetElementPtrInst *GEPA,
158 GetElementPtrInst *GEPB, Value *ExtStart,
160 Value *createPredicatedFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU,
161 GetElementPtrInst *GEPA,
162 GetElementPtrInst *GEPB, Value *ExtStart,
164
165 void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
166 PHINode *IndPhi, Value *MaxLen, Instruction *Index,
167 Value *Start, bool IncIdx, BasicBlock *FoundBB,
168 BasicBlock *EndBB);
169
170 bool recognizeFindFirstByte();
171
172 Value *expandFindFirstByte(IRBuilder<> &Builder, DomTreeUpdater &DTU,
173 unsigned VF, Type *CharTy, Value *IndPhi,
174 BasicBlock *ExitSucc, BasicBlock *ExitFail,
175 Value *SearchStart, Value *SearchEnd,
176 Value *NeedleStart, Value *NeedleEnd);
177
178 void transformFindFirstByte(PHINode *IndPhi, unsigned VF, Type *CharTy,
179 BasicBlock *ExitSucc, BasicBlock *ExitFail,
180 Value *SearchStart, Value *SearchEnd,
181 Value *NeedleStart, Value *NeedleEnd);
182
183};
184}
185
191
192 const auto *DL = &L.getHeader()->getDataLayout();
193
197
198 unsigned BCVF = ByteCompareVF;
199 if (ByteCmpVF.getNumOccurrences())
201
202 LoopIdiomVectorize LIV(VecStyle, BCVF, &AR.DT, &AR.LI, &AR.TTI, DL);
203 if (!LIV.run(&L))
205
207}
208
209
210
211
212
213
214
215bool LoopIdiomVectorize::run(Loop *L) {
216 CurLoop = L;
217
218 Function &F = *L->getHeader()->getParent();
220 return false;
221
222 if (F.hasFnAttribute(Attribute::NoImplicitFloat)) {
224 << " due to its NoImplicitFloat attribute");
225 return false;
226 }
227
228
229
230 if (!L->getLoopPreheader())
231 return false;
232
234 << CurLoop->getHeader()->getName() << "\n");
235
236 if (recognizeByteCompare())
237 return true;
238
239 if (recognizeFindFirstByte())
240 return true;
241
242 return false;
243}
244
248
249
250 bool ResPhi = false;
251 for (Value *Op : PN.incoming_values())
252 if (Op == ScalarRes) {
253 ResPhi = true;
254 break;
255 }
256
257
258
259 if (ResPhi)
260 PN.addIncoming(VectorRes, IncBB);
261 else {
262
263
264
265
266
268 if (L->contains(BB)) {
269 PN.addIncoming(PN.getIncomingValueForBlock(BB), IncBB);
270 break;
271 }
272 }
273 }
274}
275
276bool LoopIdiomVectorize::recognizeByteCompare() {
277
278
279
280
281
282
283 if (->supportsScalableVectors() ||
->getMinPageSize().has_value() ||
285 return false;
286
287 BasicBlock *Header = CurLoop->getHeader();
288
289
290
291 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 2)
292 return false;
293
296 return false;
297
298 auto LoopBlocks = CurLoop->getBlocks();
299
300
301
302
303
304
305
306
307 if (LoopBlocks[0]->sizeWithoutDebug() > 4)
308 return false;
309
310
311
312
313
314
315
316
317
318
319
320
321 if (LoopBlocks[1]->sizeWithoutDebug() > 7)
322 return false;
323
324
325 Value *StartIdx = nullptr;
330 } else {
333 }
334
335
336 if (!Index || ->getType()->isIntegerTy(32) ||
338 return false;
339
340
341
342
345 if (&I != PN && &I != Index)
348 return false;
349
350
353 if ((Header->getTerminator(),
357 !CurLoop->contains(WhileBB))
358 return false;
359
360
361
364 Value *LoadA, *LoadB;
369 !CurLoop->contains(TrueBB))
370 return false;
371
374 return false;
375
379 return false;
380
383
384 if (!GEPA || !GEPB)
385 return false;
386
389
390
391 if (!CurLoop->isLoopInvariant(PtrA) || !CurLoop->isLoopInvariant(PtrB) ||
396 return false;
397
398
400 return false;
401
405 return false;
406
407
408
410 return false;
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430 if (FoundBB == EndBB) {
432 Value *WhileCondVal = EndPN.getIncomingValueForBlock(Header);
433 Value *WhileBodyVal = EndPN.getIncomingValueForBlock(WhileBB);
434
435
436
437
438
439 if (WhileCondVal != WhileBodyVal &&
440 ((WhileCondVal != Index && WhileCondVal != MaxLen) ||
441 (WhileBodyVal != Index)))
442 return false;
443 }
444 }
445
447 << *(EndBB->getParent()) << "\n\n");
448
449
450
451 transformByteCompare(GEPA, GEPB, PN, MaxLen, Index, StartIdx, true,
452 FoundBB, EndBB);
453 return true;
454}
455
456Value *LoopIdiomVectorize::createMaskedFindMismatch(
464
467
469 Intrinsic::get_active_lane_mask, {PredVTy, I64Type}, {ExtStart, ExtEnd});
470
472 VecLen =
473 Builder.CreateMul(VecLen, ConstantInt::get(I64Type, ByteCompareVF), "",
474 true, true);
475
478
480 Builder.Insert(JumpToVectorLoop);
481
483 VectorLoopStartBlock}});
484
485
486
488 PHINode *LoopPred = Builder.CreatePHI(PredVTy, 2, "mismatch_vec_loop_pred");
489 LoopPred->addIncoming(InitialPred, VectorLoopPreheaderBlock);
490 PHINode *VectorIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_vec_index");
491 VectorIndexPhi->addIncoming(ExtStart, VectorLoopPreheaderBlock);
492 Type *VectorLoadType =
495
496 Value *VectorLhsGep =
497 Builder.CreateGEP(LoadType, PtrA, VectorIndexPhi, "", GEPA->isInBounds());
499 Align(1), LoopPred, Passthru);
500
501 Value *VectorRhsGep =
502 Builder.CreateGEP(LoadType, PtrB, VectorIndexPhi, "", GEPB->isInBounds());
504 Align(1), LoopPred, Passthru);
505
506 Value *VectorMatchCmp = Builder.CreateICmpNE(VectorLhsLoad, VectorRhsLoad);
507 VectorMatchCmp = Builder.CreateSelect(LoopPred, VectorMatchCmp, PFalse);
508 Value *VectorMatchHasActiveLanes = Builder.CreateOrReduce(VectorMatchCmp);
510 VectorLoopMismatchBlock, VectorLoopIncBlock, VectorMatchHasActiveLanes);
511 Builder.Insert(VectorEarlyExit);
512
516
517
518
519
521 Value *NewVectorIndexPhi =
522 Builder.CreateAdd(VectorIndexPhi, VecLen, "",
523 true, true);
524 VectorIndexPhi->addIncoming(NewVectorIndexPhi, VectorLoopIncBlock);
527 {PredVTy, I64Type}, {NewVectorIndexPhi, ExtEnd});
528 LoopPred->addIncoming(NewPred, VectorLoopIncBlock);
529
530 Value *PredHasActiveLanes =
533 BranchInst::Create(VectorLoopStartBlock, EndBlock, PredHasActiveLanes);
534 Builder.Insert(VectorLoopBranchBack);
535
539
540
541
543 PHINode *FoundPred = Builder.CreatePHI(PredVTy, 1, "mismatch_vec_found_pred");
544 FoundPred->addIncoming(VectorMatchCmp, VectorLoopStartBlock);
546 Builder.CreatePHI(PredVTy, 1, "mismatch_vec_last_loop_pred");
547 LastLoopPred->addIncoming(LoopPred, VectorLoopStartBlock);
548 PHINode *VectorFoundIndex =
549 Builder.CreatePHI(I64Type, 1, "mismatch_vec_found_index");
550 VectorFoundIndex->addIncoming(VectorIndexPhi, VectorLoopStartBlock);
551
552 Value *PredMatchCmp = Builder.CreateAnd(LastLoopPred, FoundPred);
554 Ctz = Builder.CreateZExt(Ctz, I64Type);
555 Value *VectorLoopRes64 = Builder.CreateAdd(VectorFoundIndex, Ctz, "",
556 true, true);
557 return Builder.CreateTrunc(VectorLoopRes64, ResType);
558}
559
560Value *LoopIdiomVectorize::createPredicatedFindMismatch(
565 Type *ResType = I32Type;
569
571 Builder.Insert(JumpToVectorLoop);
572
574 VectorLoopStartBlock}});
575
576
577
579 auto *VectorIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_vector_index");
580 VectorIndexPhi->addIncoming(ExtStart, VectorLoopPreheaderBlock);
581
582
583 Value *AVL = Builder.CreateSub(ExtEnd, VectorIndexPhi, "avl", true,
584 true);
585
587 auto *VF = ConstantInt::get(I32Type, ByteCompareVF);
588
589 Value *VL = Builder.CreateIntrinsic(Intrinsic::experimental_get_vector_length,
590 {I64Type}, {AVL, VF, Builder.getTrue()});
591 Value *GepOffset = VectorIndexPhi;
592
593 Value *VectorLhsGep =
599 Intrinsic::vp_load, {VectorLoadType, VectorLhsGep->getType()},
600 {VectorLhsGep, AllTrueMask, VL}, nullptr, "lhs.load");
601
602 Value *VectorRhsGep =
605 Intrinsic::vp_load, {VectorLoadType, VectorLhsGep->getType()},
606 {VectorRhsGep, AllTrueMask, VL}, nullptr, "rhs.load");
607
608 Value *VectorMatchCmp =
609 Builder.CreateICmpNE(VectorLhsLoad, VectorRhsLoad, "mismatch.cmp");
611 Intrinsic::vp_cttz_elts, {ResType, VectorMatchCmp->getType()},
612 {VectorMatchCmp, Builder.getInt1(false), AllTrueMask,
613 VL});
616 VectorLoopIncBlock, MismatchFound);
617 Builder.Insert(VectorEarlyExit);
618
622
623
624
625
628 Value *NewVectorIndexPhi =
629 Builder.CreateAdd(VectorIndexPhi, VL64, "",
630 true, true);
631 VectorIndexPhi->addIncoming(NewVectorIndexPhi, VectorLoopIncBlock);
632 Value *ExitCond = Builder.CreateICmpNE(NewVectorIndexPhi, ExtEnd);
633 auto *VectorLoopBranchBack =
635 Builder.Insert(VectorLoopBranchBack);
636
640
641
642
644
645
646 auto *CTZLCSSAPhi = Builder.CreatePHI(CTZ->getType(), 1, "ctz");
647 CTZLCSSAPhi->addIncoming(CTZ, VectorLoopStartBlock);
648 auto *VectorIndexLCSSAPhi =
649 Builder.CreatePHI(VectorIndexPhi->getType(), 1, "mismatch_vector_index");
650 VectorIndexLCSSAPhi->addIncoming(VectorIndexPhi, VectorLoopStartBlock);
651
652 Value *CTZI64 = Builder.CreateZExt(CTZLCSSAPhi, I64Type);
653 Value *VectorLoopRes64 = Builder.CreateAdd(VectorIndexLCSSAPhi, CTZI64, "",
654 true, true);
655 return Builder.CreateTrunc(VectorLoopRes64, ResType);
656}
657
658Value *LoopIdiomVectorize::expandFindMismatch(
663
664
665 BasicBlock *Preheader = CurLoop->getLoopPreheader();
670
671
672 EndBlock = SplitBlock(Preheader, PHBranch, DT, LI, nullptr, "mismatch_end");
673
674
675
676
677
678
679
680
681
682
683
684
685
686
688 Ctx, "mismatch_min_it_check", EndBlock->getParent(), EndBlock);
689
690
692
694 Ctx, "mismatch_mem_check", EndBlock->getParent(), EndBlock);
695
697 Ctx, "mismatch_vec_loop_preheader", EndBlock->getParent(), EndBlock);
698
700 EndBlock->getParent(), EndBlock);
701
702 VectorLoopIncBlock = BasicBlock::Create(Ctx, "mismatch_vec_loop_inc",
703 EndBlock->getParent(), EndBlock);
704
705 VectorLoopMismatchBlock = BasicBlock::Create(Ctx, "mismatch_vec_loop_found",
706 EndBlock->getParent(), EndBlock);
707
709 Ctx, "mismatch_loop_pre", EndBlock->getParent(), EndBlock);
710
712 BasicBlock::Create(Ctx, "mismatch_loop", EndBlock->getParent(), EndBlock);
713
715 Ctx, "mismatch_loop_inc", EndBlock->getParent(), EndBlock);
716
719
720
721 auto VectorLoop = LI->AllocateLoop();
722 auto ScalarLoop = LI->AllocateLoop();
723
724 if (CurLoop->getParentLoop()) {
725 CurLoop->getParentLoop()->addBasicBlockToLoop(MinItCheckBlock, *LI);
726 CurLoop->getParentLoop()->addBasicBlockToLoop(MemCheckBlock, *LI);
727 CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopPreheaderBlock,
728 *LI);
729 CurLoop->getParentLoop()->addChildLoop(VectorLoop);
730 CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopMismatchBlock, *LI);
731 CurLoop->getParentLoop()->addBasicBlockToLoop(LoopPreHeaderBlock, *LI);
732 CurLoop->getParentLoop()->addChildLoop(ScalarLoop);
733 } else {
734 LI->addTopLevelLoop(VectorLoop);
735 LI->addTopLevelLoop(ScalarLoop);
736 }
737
738
739 VectorLoop->addBasicBlockToLoop(VectorLoopStartBlock, *LI);
740 VectorLoop->addBasicBlockToLoop(VectorLoopIncBlock, *LI);
741
742 ScalarLoop->addBasicBlockToLoop(LoopStartBlock, *LI);
743 ScalarLoop->addBasicBlockToLoop(LoopIncBlock, *LI);
744
745
747
748
752
753
758 LLVMContext::MD_prof,
760 Builder.Insert(MinItCheckBr);
761
765
766
767
769
770
771
772
773
774
775
776
777
778
779
780
781
782 Value *LhsStartGEP = Builder.CreateGEP(LoadType, PtrA, ExtStart);
783 Value *RhsStartGEP = Builder.CreateGEP(LoadType, PtrB, ExtStart);
786 Value *LhsEndGEP = Builder.CreateGEP(LoadType, PtrA, ExtEnd);
787 Value *RhsEndGEP = Builder.CreateGEP(LoadType, PtrB, ExtEnd);
790
791 const uint64_t MinPageSize = TTI->getMinPageSize().value();
793 Value *LhsStartPage = Builder.CreateLShr(LhsStart, AddrShiftAmt);
794 Value *LhsEndPage = Builder.CreateLShr(LhsEnd, AddrShiftAmt);
795 Value *RhsStartPage = Builder.CreateLShr(RhsStart, AddrShiftAmt);
796 Value *RhsEndPage = Builder.CreateLShr(RhsEnd, AddrShiftAmt);
797 Value *LhsPageCmp = Builder.CreateICmpNE(LhsStartPage, LhsEndPage);
798 Value *RhsPageCmp = Builder.CreateICmpNE(RhsStartPage, RhsEndPage);
799
800 Value *CombinedPageCmp = Builder.CreateOr(LhsPageCmp, RhsPageCmp);
802 LoopPreHeaderBlock, VectorLoopPreheaderBlock, CombinedPageCmp);
806 Builder.Insert(CombinedPageCmpCmpBr);
807
811
812
813
814
816
817
818
819
820
821
822 Value *VectorLoopRes = nullptr;
823 switch (VectorizeStyle) {
825 VectorLoopRes =
826 createMaskedFindMismatch(Builder, DTU, GEPA, GEPB, ExtStart, ExtEnd);
827 break;
829 VectorLoopRes = createPredicatedFindMismatch(Builder, DTU, GEPA, GEPB,
830 ExtStart, ExtEnd);
831 break;
832 }
833
835
838
839
842
845
847 PHINode *IndexPhi = Builder.CreatePHI(ResType, 2, "mismatch_index");
848 IndexPhi->addIncoming(Start, LoopPreHeaderBlock);
849
850
851
852 Value *GepOffset = Builder.CreateZExt(IndexPhi, I64Type);
853
857
861
863
865 Builder.Insert(MatchCmpBr);
866
869
870
872 Value *PhiInc = Builder.CreateAdd(IndexPhi, ConstantInt::get(ResType, 1), "",
873 Index->hasNoUnsignedWrap(),
874 Index->hasNoSignedWrap());
875 IndexPhi->addIncoming(PhiInc, LoopIncBlock);
878 Builder.Insert(IVCmpBr);
879
882
883
884
885
886
887
888
889
890 Builder.SetInsertPoint(EndBlock, EndBlock->getFirstInsertionPt());
891 PHINode *ResPhi = Builder.CreatePHI(ResType, 4, "mismatch_result");
892 ResPhi->addIncoming(MaxLen, LoopIncBlock);
893 ResPhi->addIncoming(IndexPhi, LoopStartBlock);
894 ResPhi->addIncoming(MaxLen, VectorLoopIncBlock);
895 ResPhi->addIncoming(VectorLoopRes, VectorLoopMismatchBlock);
896
898
900 ScalarLoop->verifyLoop();
901 VectorLoop->verifyLoop();
902 if (!VectorLoop->isRecursivelyLCSSAForm(*DT, *LI))
904 if (!ScalarLoop->isRecursivelyLCSSAForm(*DT, *LI))
906 }
907
908 return FinalRes;
909}
910
911void LoopIdiomVectorize::transformByteCompare(GetElementPtrInst *GEPA,
917
918
919 BasicBlock *Preheader = CurLoop->getLoopPreheader();
920 BasicBlock *Header = CurLoop->getHeader();
923 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
925
926
927 if (IncIdx)
929
930 Value *ByteCmpRes =
931 expandFindMismatch(Builder, DTU, GEPA, GEPB, Index, Start, MaxLen);
932
933
934
935 assert(IndPhi->hasOneUse() && "Index phi node has more than one use!");
936 Index->replaceAllUsesWith(ByteCmpRes);
937
939 "Expected preheader to terminate with an unconditional branch.");
940
941
942
945 CmpBB->moveBefore(EndBB);
946
947
948
951
954
955
956
958 if (FoundBB != EndBB) {
960 Builder.CreateCondBr(FoundCmp, EndBB, FoundBB);
963
964 } else {
967 }
968
969
970 fixSuccessorPhis(CurLoop, ByteCmpRes, ByteCmpRes, EndBB, CmpBB);
971 if (EndBB != FoundBB)
972 fixSuccessorPhis(CurLoop, ByteCmpRes, ByteCmpRes, FoundBB, CmpBB);
973
974
975
976 if (!CurLoop->isOutermost())
977 CurLoop->getParentLoop()->addBasicBlockToLoop(CmpBB, *LI);
978
979 if (VerifyLoops && CurLoop->getParentLoop()) {
980 CurLoop->getParentLoop()->verifyLoop();
981 if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI))
983 }
984}
985
986bool LoopIdiomVectorize::recognizeFindFirstByte() {
987
988
989
990
991 if (->supportsScalableVectors() ||
->getMinPageSize().has_value() ||
993 return false;
994
995
996 BasicBlock *Header = CurLoop->getHeader();
998
999
1000
1001
1002 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 4 ||
1003 CurLoop->getSubLoops().size() != 1)
1004 return false;
1005
1006 auto *InnerLoop = CurLoop->getSubLoops().front();
1009 return false;
1010
1011
1012 auto LoopBlocks = CurLoop->getBlocks();
1013 if (LoopBlocks[0]->sizeWithoutDebug() > 3 ||
1014 LoopBlocks[1]->sizeWithoutDebug() > 4 ||
1015 LoopBlocks[2]->sizeWithoutDebug() > 3 ||
1016 LoopBlocks[3]->sizeWithoutDebug() > 3)
1017 return false;
1018
1019
1022 if (&I != IndPhi)
1025 return false;
1026
1027
1028
1029
1030
1031
1032
1033
1036 !InnerLoop->contains(MatchBB))
1037 return false;
1038
1039
1040
1041
1042
1043
1044
1045
1046
1048 Value *LoadSearch, *LoadNeedle;
1053 MatchPred != ICmpInst::ICMP_EQ || !InnerLoop->contains(InnerBB))
1054 return false;
1055
1056
1060 if (!PN || PN->getParent() != ExitSucc)
1061 return false;
1062 }
1063
1064
1065 Value *Search, *Needle;
1070 return false;
1071
1072
1075 return false;
1076
1077
1078
1079
1085 Args);
1087 return false;
1088
1089
1094 return false;
1095
1096
1097
1098 if (InnerLoop->contains(PSearch))
1100 if (PSearch != &Header->front() || PNeedle != &MatchBB->front())
1101 return false;
1102
1103
1107 std::swap(SearchStart, SearchIndex);
1108
1112 std::swap(NeedleStart, NeedleIndex);
1113
1114
1117 return false;
1118
1119
1124 return false;
1125
1126
1127
1128
1129
1130
1131
1133 Value *NeedleEnd;
1138 !CurLoop->contains(OuterBB))
1139 return false;
1140
1141
1142
1143
1144
1145
1146
1148 Value *SearchEnd;
1153 return false;
1154
1155 if (!CurLoop->isLoopInvariant(SearchStart) ||
1156 !CurLoop->isLoopInvariant(SearchEnd) ||
1157 !CurLoop->isLoopInvariant(NeedleStart) ||
1158 !CurLoop->isLoopInvariant(NeedleEnd))
1159 return false;
1160
1161 LLVM_DEBUG(dbgs() << "Found idiom in loop: \n" << *CurLoop << "\n\n");
1162
1163 transformFindFirstByte(IndPhi, VF, CharTy, ExitSucc, ExitFail, SearchStart,
1164 SearchEnd, NeedleStart, NeedleEnd);
1165 return true;
1166}
1167
1168Value *LoopIdiomVectorize::expandFindFirstByte(
1171 Value *SearchStart, Value *SearchEnd, Value *NeedleStart,
1172 Value *NeedleEnd) {
1173
1174 auto *PtrTy = Builder.getPtrTy();
1178 auto *ConstVF = ConstantInt::get(I64Ty, VF);
1179
1180
1181 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1184
1185
1186
1188 nullptr, "scalar_preheader");
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1221
1222
1223 auto OuterLoop = LI->AllocateLoop();
1224 auto InnerLoop = LI->AllocateLoop();
1225
1226 if (auto ParentLoop = CurLoop->getParentLoop()) {
1227 ParentLoop->addBasicBlockToLoop(BB0, *LI);
1228 ParentLoop->addChildLoop(OuterLoop);
1229 ParentLoop->addBasicBlockToLoop(BB3, *LI);
1230 } else {
1231 LI->addTopLevelLoop(OuterLoop);
1232 }
1233
1234
1235 OuterLoop->addChildLoop(InnerLoop);
1236
1237
1238 OuterLoop->addBasicBlockToLoop(BB1, *LI);
1239 OuterLoop->addBasicBlockToLoop(BB5, *LI);
1240 InnerLoop->addBasicBlockToLoop(BB2, *LI);
1241 InnerLoop->addBasicBlockToLoop(BB4, *LI);
1242
1243
1247
1248
1249
1250
1252 Value *ISearchStart =
1253 Builder.CreatePtrToInt(SearchStart, I64Ty, "search_start_int");
1254 Value *ISearchEnd =
1255 Builder.CreatePtrToInt(SearchEnd, I64Ty, "search_end_int");
1256 Value *INeedleStart =
1257 Builder.CreatePtrToInt(NeedleStart, I64Ty, "needle_start_int");
1258 Value *INeedleEnd =
1259 Builder.CreatePtrToInt(NeedleEnd, I64Ty, "needle_end_int");
1261 Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
1262 {ConstantInt::get(I64Ty, 0), ConstVF});
1263
1264 const uint64_t MinPageSize = TTI->getMinPageSize().value();
1266 Value *SearchStartPage =
1267 Builder.CreateLShr(ISearchStart, AddrShiftAmt, "search_start_page");
1268 Value *SearchEndPage =
1269 Builder.CreateLShr(ISearchEnd, AddrShiftAmt, "search_end_page");
1270 Value *NeedleStartPage =
1271 Builder.CreateLShr(INeedleStart, AddrShiftAmt, "needle_start_page");
1272 Value *NeedleEndPage =
1273 Builder.CreateLShr(INeedleEnd, AddrShiftAmt, "needle_end_page");
1274 Value *SearchPageCmp =
1275 Builder.CreateICmpNE(SearchStartPage, SearchEndPage, "search_page_cmp");
1276 Value *NeedlePageCmp =
1277 Builder.CreateICmpNE(NeedleStartPage, NeedleEndPage, "needle_page_cmp");
1278
1279 Value *CombinedPageCmp =
1280 Builder.CreateOr(SearchPageCmp, NeedlePageCmp, "combined_page_cmp");
1282 CombinedPageBr->setMetadata(LLVMContext::MD_prof,
1286
1287
1291 Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
1292 {Builder.CreatePtrToInt(Search, I64Ty), ISearchEnd}, nullptr,
1293 "search_pred");
1294 PredSearch = Builder.CreateAnd(PredVF, PredSearch, "search_masked");
1296 CharVTy, Search, Align(1), PredSearch, Passthru, "search_load_vec");
1299
1300
1303
1304
1306 Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
1307 {Builder.CreatePtrToInt(Needle, I64Ty), INeedleEnd}, nullptr,
1308 "needle_pred");
1309 PredNeedle = Builder.CreateAnd(PredVF, PredNeedle, "needle_masked");
1311 CharVTy, Needle, Align(1), PredNeedle, Passthru, "needle_load_vec");
1312
1313
1314 Value *Needle0 =
1317 Needle0, "needle0");
1318 LoadNeedle = Builder.CreateSelect(PredNeedle, LoadNeedle, Needle0Splat,
1319 "needle_splat");
1322
1323
1325 Intrinsic::experimental_vector_match, {CharVTy, LoadNeedle->getType()},
1326 {LoadSearch, LoadNeedle, PredSearch}, nullptr, "match_pred");
1331
1332
1334 PHINode *MatchLCSSA = Builder.CreatePHI(PtrTy, 1, "match_start");
1335 PHINode *MatchPredLCSSA =
1338 Intrinsic::experimental_cttz_elts, {I64Ty, MatchPred->getType()},
1339 {MatchPredLCSSA, Builder.getInt1(true)}, nullptr,
1340 "match_idx");
1341 Value *MatchVal =
1342 Builder.CreateGEP(CharTy, MatchLCSSA, MatchCnt, "match_res");
1345
1346
1348 Value *NextNeedle =
1349 Builder.CreateGEP(CharTy, Needle, ConstVF, "needle_next_vec");
1353
1354
1356 Value *NextSearch =
1357 Builder.CreateGEP(CharTy, Search, ConstVF, "search_next_vec");
1359 ExitFail);
1362
1363
1368
1370 MatchPredLCSSA->addIncoming(MatchPred, BB2);
1371
1372
1373
1375 if (ExitSucc != ExitFail)
1377
1379 OuterLoop->verifyLoop();
1380 InnerLoop->verifyLoop();
1381 if (!OuterLoop->isRecursivelyLCSSAForm(*DT, *LI))
1383 }
1384
1385 return MatchVal;
1386}
1387
1388void LoopIdiomVectorize::transformFindFirstByte(
1391 Value *NeedleStart, Value *NeedleEnd) {
1392
1393 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1396 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
1398
1399 expandFindFirstByte(Builder, DTU, VF, CharTy, IndPhi, ExitSucc, ExitFail,
1400 SearchStart, SearchEnd, NeedleStart, NeedleEnd);
1401
1403 "Expected preheader to terminate with an unconditional branch.");
1404
1405 if (VerifyLoops && CurLoop->getParentLoop()) {
1406 CurLoop->getParentLoop()->verifyLoop();
1407 if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI))
1409 }
1410}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static cl::opt< bool > VerifyLoops("loop-idiom-vectorize-verify", cl::Hidden, cl::init(false), cl::desc("Verify loops generated Loop Idiom Vectorize Pass."))
static cl::opt< bool > DisableAll("disable-loop-idiom-vectorize-all", cl::Hidden, cl::init(false), cl::desc("Disable Loop Idiom Vectorize Pass."))
static void fixSuccessorPhis(Loop *L, Value *ScalarRes, Value *VectorRes, BasicBlock *SuccBB, BasicBlock *IncBB)
Definition LoopIdiomVectorize.cpp:245
static cl::opt< LoopIdiomVectorizeStyle > LITVecStyle("loop-idiom-vectorize-style", cl::Hidden, cl::desc("The vectorization style for loop idiom transform."), cl::values(clEnumValN(LoopIdiomVectorizeStyle::Masked, "masked", "Use masked vector intrinsics"), clEnumValN(LoopIdiomVectorizeStyle::Predicated, "predicated", "Use VP intrinsics")), cl::init(LoopIdiomVectorizeStyle::Masked))
static cl::opt< bool > DisableFindFirstByte("disable-loop-idiom-vectorize-find-first-byte", cl::Hidden, cl::init(false), cl::desc("Do not convert find-first-byte loop(s)."))
static cl::opt< unsigned > ByteCmpVF("loop-idiom-vectorize-bytecmp-vf", cl::Hidden, cl::desc("The vectorization factor for byte-compare patterns."), cl::init(16))
static cl::opt< bool > DisableByteCmp("disable-loop-idiom-vectorize-bytecmp", cl::Hidden, cl::init(false), cl::desc("Proceed with Loop Idiom Vectorize Pass, but do " "not convert byte-compare loop(s)."))
static bool isSimple(Instruction *I)
static cl::opt< unsigned > MinPageSize("min-page-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target's minimum page size."))
This pass exposes codegen information to IR-level passes.
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Instruction & front() const
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
bool isUnconditional() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Value * getPointerOperand()
Type * getResultElementType() const
unsigned getNumIndices() const
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateVScale(Type *Ty, const Twine &Name="")
Create a call to llvm.vscale.().
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition LoopIdiomVectorize.cpp:186
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Class to represent scalable SIMD vectors.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
br_match m_UnconditionalBr(BasicBlock *&Succ)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
auto m_GEP(const OperandTypes &...Ops)
Matches GetElementPtrInst.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI