LLVM: lib/Transforms/Scalar/LoopIdiomRecognize.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
87#include
88#include
89#include
90#include
91#include
92
93using namespace llvm;
94
95#define DEBUG_TYPE "loop-idiom"
96
97STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
98STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
99STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
101 NumShiftUntilBitTest,
102 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
104 "Number of uncountable loops recognized as 'shift until zero' idiom");
105
109 cl::desc("Options to disable Loop Idiom Recognize Pass."),
112
116 cl::desc("Proceed with loop idiom recognize pass, but do "
117 "not convert loop(s) to memset."),
120
124 cl::desc("Proceed with loop idiom recognize pass, but do "
125 "not convert loop(s) to memcpy."),
128
130 "use-lir-code-size-heurs",
131 cl::desc("Use loop idiom recognition code size heuristics when compiling "
132 "with -Os/-Oz"),
134
135namespace {
136
137class LoopIdiomRecognize {
138 Loop *CurLoop = nullptr;
147 bool ApplyCodeSizeHeuristics;
148 std::unique_ptr MSSAU;
149
150public:
157 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
158 if (MSSA)
159 MSSAU = std::make_unique(MSSA);
160 }
161
162 bool runOnLoop(Loop *L);
163
164private:
167
168 StoreListMap StoreRefsForMemset;
169 StoreListMap StoreRefsForMemsetPattern;
170 StoreList StoreRefsForMemcpy;
171 bool HasMemset;
172 bool HasMemsetPattern;
173 bool HasMemcpy;
174
175
176 enum LegalStoreKind {
178 Memset,
179 MemsetPattern,
180 Memcpy,
181 UnorderedAtomicMemcpy,
182 DontUse
183
184 };
185
186
187
188
189 bool runOnCountableLoop();
190 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
192
194 LegalStoreKind isLegalStore(StoreInst *SI);
195 enum class ForMemset { No, Yes };
197 ForMemset For);
198
199 template
200 bool processLoopMemIntrinsic(
202 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
203 const SCEV *BECount);
204 bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
205 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
206
207 bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
212 bool IsNegStride, bool IsLoopMemset = false);
213 bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
214 bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
220 const SCEV *BECount);
221 bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
222 bool IsLoopMemset = false);
223
224
225
226
227
228 bool runOnNoncountableLoop();
229
230 bool recognizePopcount();
234 bool ZeroCheck, size_t CanonicalSize);
238 bool recognizeAndInsertFFS();
239 bool recognizeShiftUntilLessThan();
243 const DebugLoc &DL, bool ZeroCheck,
244 bool IsCntPhiUsedOutsideLoop,
245 bool InsertSub = false);
246
247 bool recognizeShiftUntilBitTest();
248 bool recognizeShiftUntilZero();
249
250
251};
252}
253
259
260 const auto *DL = &L.getHeader()->getDataLayout();
261
262
263
264
266
267 LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
269 if (!LIR.runOnLoop(&L))
271
275 return PA;
276}
277
280 I->eraseFromParent();
281}
282
283
284
285
286
287
288
289bool LoopIdiomRecognize::runOnLoop(Loop *L) {
290 CurLoop = L;
291
292
293 if (->getLoopPreheader())
294 return false;
295
296
297 StringRef Name = L->getHeader()->getParent()->getName();
298 if (Name == "memset" || Name == "memcpy")
299 return false;
300
301
302 ApplyCodeSizeHeuristics =
304
305 HasMemset = TLI->has(LibFunc_memset);
306 HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
307 HasMemcpy = TLI->has(LibFunc_memcpy);
308
309 if (HasMemset || HasMemsetPattern || HasMemcpy)
311 return runOnCountableLoop();
312
313 return runOnNoncountableLoop();
314}
315
316bool LoopIdiomRecognize::runOnCountableLoop() {
318 assert(!isa(BECount) &&
319 "runOnCountableLoop() called on a loop without a predictable"
320 "backedge-taken count");
321
322
323
324 if (const SCEVConstant *BECst = dyn_cast(BECount))
325 if (BECst->getAPInt() == 0)
326 return false;
327
330
334 << "\n");
335
336
337
341 return false;
342
343 bool MadeChange = false;
344
345
346 for (auto *BB : CurLoop->getBlocks()) {
347
349 continue;
350
351 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
352 }
353 return MadeChange;
354}
355
358 return ConstStride->getAPInt();
359}
360
361
362
363
364
365
366
368
369
370
371
372
373
374 Constant *C = dyn_cast(V);
375 if ( || isa(C))
376 return nullptr;
377
378
381 return nullptr;
382
383
384 if (DL->isBigEndian())
385 return nullptr;
386
387
389
390
391
392 if (Size > 16)
393 return nullptr;
394
395
396 if (Size == 16)
397 return C;
398
399
400 unsigned ArraySize = 16 / Size;
403}
404
405LoopIdiomRecognize::LegalStoreKind
406LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
407
408 if (SI->isVolatile())
409 return LegalStoreKind::None;
410
411 if (->isUnordered())
412 return LegalStoreKind::None;
413
414
415 if (SI->getMetadata(LLVMContext::MD_nontemporal))
416 return LegalStoreKind::None;
417
418 Value *StoredVal = SI->getValueOperand();
419 Value *StorePtr = SI->getPointerOperand();
420
421
422
424 return LegalStoreKind::None;
425
426
427
428
429 TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
432 return LegalStoreKind::None;
433
434
435
436
438 dyn_cast(SE->getSCEV(StorePtr));
439 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
440 return LegalStoreKind::None;
441
442
443 if (!isa(StoreEv->getOperand(1)))
444 return LegalStoreKind::None;
445
446
447
448
449
450
451
453
454
455 bool UnorderedAtomic = SI->isUnordered() && ->isSimple();
456
457
458
459 if (!UnorderedAtomic && HasMemset && SplatValue && &&
460
461
463
464 return LegalStoreKind::Memset;
465 }
467
470
471 return LegalStoreKind::MemsetPattern;
472 }
473
474
476
477
479 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
480 if (StoreSize != Stride && StoreSize != -Stride)
481 return LegalStoreKind::None;
482
483
484 LoadInst *LI = dyn_cast(SI->getValueOperand());
485
486
488 return LegalStoreKind::None;
489
491 return LegalStoreKind::None;
492
493
494
495
498 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
499 return LegalStoreKind::None;
500
501
503 return LegalStoreKind::None;
504
505
506 UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
507 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
508 : LegalStoreKind::Memcpy;
509 }
510
511 return LegalStoreKind::None;
512}
513
514void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
515 StoreRefsForMemset.clear();
516 StoreRefsForMemsetPattern.clear();
517 StoreRefsForMemcpy.clear();
520 if (!SI)
521 continue;
522
523
524 switch (isLegalStore(SI)) {
525 case LegalStoreKind::None:
526
527 break;
528 case LegalStoreKind::Memset: {
529
531 StoreRefsForMemset[Ptr].push_back(SI);
532 } break;
533 case LegalStoreKind::MemsetPattern: {
534
536 StoreRefsForMemsetPattern[Ptr].push_back(SI);
537 } break;
538 case LegalStoreKind::Memcpy:
539 case LegalStoreKind::UnorderedAtomicMemcpy:
540 StoreRefsForMemcpy.push_back(SI);
541 break;
542 default:
543 assert(false && "unhandled return value");
544 break;
545 }
546 }
547}
548
549
550
551
552bool LoopIdiomRecognize::runOnLoopBlock(
555
556
557
558 for (BasicBlock *ExitBlock : ExitBlocks)
559 if (!DT->dominates(BB, ExitBlock))
560 return false;
561
562 bool MadeChange = false;
563
564 collectStores(BB);
565
566
567
568
569 for (auto &SL : StoreRefsForMemset)
570 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
571
572 for (auto &SL : StoreRefsForMemsetPattern)
573 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
574
575
576 for (auto &SI : StoreRefsForMemcpy)
577 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
578
579 MadeChange |= processLoopMemIntrinsic(
580 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
581 MadeChange |= processLoopMemIntrinsic(
582 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
583
584 return MadeChange;
585}
586
587
589 const SCEV *BECount, ForMemset For) {
590
593
594
595
597 for (unsigned i = 0, e = SL.size(); i < e; ++i) {
598 assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
599
600 Value *FirstStoredVal = SL[i]->getValueOperand();
601 Value *FirstStorePtr = SL[i]->getPointerOperand();
603 cast(SE->getSCEV(FirstStorePtr));
605 unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
606
607
608 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
610 continue;
611 }
612
613 Value *FirstSplatValue = nullptr;
614 Constant *FirstPatternValue = nullptr;
615
616 if (For == ForMemset::Yes)
618 else
620
621 assert((FirstSplatValue || FirstPatternValue) &&
622 "Expected either splat value or pattern value.");
623
624 IndexQueue.clear();
625
626
627
628
629 unsigned j = 0;
630 for (j = i + 1; j < e; ++j)
634
635 for (auto &k : IndexQueue) {
636 assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
637 Value *SecondStorePtr = SL[k]->getPointerOperand();
639 cast(SE->getSCEV(SecondStorePtr));
641
642 if (FirstStride != SecondStride)
643 continue;
644
645 Value *SecondStoredVal = SL[k]->getValueOperand();
646 Value *SecondSplatValue = nullptr;
647 Constant *SecondPatternValue = nullptr;
648
649 if (For == ForMemset::Yes)
651 else
653
654 assert((SecondSplatValue || SecondPatternValue) &&
655 "Expected either splat value or pattern value.");
656
658 if (For == ForMemset::Yes) {
659 if (isa(FirstSplatValue))
660 FirstSplatValue = SecondSplatValue;
661 if (FirstSplatValue != SecondSplatValue)
662 continue;
663 } else {
664 if (isa(FirstPatternValue))
665 FirstPatternValue = SecondPatternValue;
666 if (FirstPatternValue != SecondPatternValue)
667 continue;
668 }
671 ConsecutiveChain[SL[i]] = SL[k];
672 break;
673 }
674 }
675 }
676
677
678
680 bool Changed = false;
681
682
685 continue;
686
687
688
691 unsigned StoreSize = 0;
692
693
694 while (Tails.count(I) || Heads.count(I)) {
695 if (TransformedStores.count(I))
696 break;
698
699 StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
700
702 }
703
708
709
710
711 if (StoreSize != Stride && StoreSize != -Stride)
712 continue;
713
714 bool IsNegStride = StoreSize == -Stride;
715
716 Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
717 const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
718 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
720 HeadStore, AdjacentStores, StoreEv, BECount,
721 IsNegStride)) {
722 TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
723 Changed = true;
724 }
725 }
726
727 return Changed;
728}
729
730
731
732template
733bool LoopIdiomRecognize::processLoopMemIntrinsic(
735 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
736 const SCEV *BECount) {
737 bool MadeChange = false;
740
741 if (MemInst *MI = dyn_cast(Inst)) {
743 if (!(this->*Processor)(MI, BECount))
744 continue;
745 MadeChange = true;
746
747
748
749 if (!InstPtr)
751 }
752 }
753 return MadeChange;
754}
755
756
757bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
758 const SCEV *BECount) {
759
761 return false;
762
763
765 return false;
766
769 if (!Dest || !Source)
770 return false;
771
772
773
774
776 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
777 return false;
779 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
780 return false;
781
782
783 uint64_t SizeInBytes = cast(MCI->getLength())->getZExtValue();
784 if ((SizeInBytes >> 32) != 0)
785 return false;
786
787
788
790 dyn_cast(StoreEv->getOperand(1));
792 dyn_cast(LoadEv->getOperand(1));
793 if (!ConstStoreStride || !ConstLoadStride)
794 return false;
795
796 APInt StoreStrideValue = ConstStoreStride->getAPInt();
797 APInt LoadStrideValue = ConstLoadStride->getAPInt();
798
800 return false;
801
802 if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
803 ORE.emit([&]() {
805 << ore::NV("Inst", "memcpy") << " in "
807 << " function will not be hoisted: "
808 << ore::NV("Reason", "memcpy size is not equal to stride");
809 });
810 return false;
811 }
812
813 int64_t StoreStrideInt = StoreStrideValue.getSExtValue();
814 int64_t LoadStrideInt = LoadStrideValue.getSExtValue();
815
816 if (StoreStrideInt != LoadStrideInt)
817 return false;
818
819 return processLoopStoreOfLoopLoad(
822 BECount);
823}
824
825
826bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
827 const SCEV *BECount) {
828
830 return false;
831
832
834 return false;
835
837
838
839
840
842 if (!Ev || Ev->getLoop() != CurLoop)
843 return false;
845 LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");
846 return false;
847 }
848
851 if (!PointerStrideSCEV || !MemsetSizeSCEV)
852 return false;
853
854 bool IsNegStride = false;
855 const bool IsConstantSize = isa(MSI->getLength());
856
857 if (IsConstantSize) {
858
859
860
862 uint64_t SizeInBytes = cast(MSI->getLength())->getZExtValue();
864 if (!ConstStride)
865 return false;
866
868 if (SizeInBytes != Stride && SizeInBytes != -Stride)
869 return false;
870
871 IsNegStride = SizeInBytes == -Stride;
872 } else {
873
874
875
876
877
878 LLVM_DEBUG(dbgs() << " memset size is non-constant\n");
879 if (Pointer->getType()->getPointerAddressSpace() != 0) {
880 LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "
881 << "abort\n");
882 return false;
883 }
885 LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "
886 << "abort\n");
887 return false;
888 }
889
890
892 const SCEV *PositiveStrideSCEV =
894 : PointerStrideSCEV;
895 LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
896 << " PositiveStrideSCEV: " << *PositiveStrideSCEV
897 << "\n");
898
899 if (PositiveStrideSCEV != MemsetSizeSCEV) {
900
901
902 const SCEV *FoldedPositiveStride =
904 const SCEV *FoldedMemsetSize =
906
907 LLVM_DEBUG(dbgs() << " Try to fold SCEV based on loop guard\n"
908 << " FoldedMemsetSize: " << *FoldedMemsetSize << "\n"
909 << " FoldedPositiveStride: " << *FoldedPositiveStride
910 << "\n");
911
912 if (FoldedPositiveStride != FoldedMemsetSize) {
914 return false;
915 }
916 }
917 }
918
919
920
922 if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
923 return false;
924
927 return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
928 MSI->getDestAlign(), SplatValue, MSI, MSIs, Ev,
929 BECount, IsNegStride, true);
930}
931
932
933
934
935static bool
937 const SCEV *BECount, const SCEV *StoreSizeSCEV,
940
941
942
944
945
946
947 const SCEVConstant *BECst = dyn_cast(BECount);
948 const SCEVConstant *ConstSize = dyn_cast(StoreSizeSCEV);
949 if (BECst && ConstSize) {
952
953 if (BEInt && SizeInt)
955 }
956
957
958
959
960
962
965 if (!IgnoredInsts.contains(&I) &&
967 return true;
968 return false;
969}
970
971
972
973
975 Type *IntPtr, const SCEV *StoreSizeSCEV,
978 if (!StoreSizeSCEV->isOne()) {
979
983 }
984
986}
987
988
989
990
991
993 const SCEV *StoreSizeSCEV, Loop *CurLoop,
995 const SCEV *TripCountSCEV =
1000}
1001
1002
1003
1004bool LoopIdiomRecognize::processLoopStridedStore(
1008 const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
1011 Constant *PatternValue = nullptr;
1012
1013 if (!SplatValue)
1015
1016 assert((SplatValue || PatternValue) &&
1017 "Expected either splat value or pattern value.");
1018
1019
1020
1021
1027
1028 Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);
1029 Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
1030
1031 bool Changed = false;
1033
1034 if (IsNegStride)
1036
1037
1038
1039 if (!Expander.isSafeToExpand(Start))
1040 return Changed;
1041
1042
1043
1044
1045
1046
1048 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
1049
1050
1051
1052
1053
1054
1055
1056
1057 Changed = true;
1058
1060 StoreSizeSCEV, *AA, Stores))
1061 return Changed;
1062
1063 if (avoidLIRForMultiBlockLoop(true, IsLoopMemset))
1064 return Changed;
1065
1066
1067
1068 const SCEV *NumBytesS =
1069 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1070
1071
1072
1073 if (!Expander.isSafeToExpand(NumBytesS))
1074 return Changed;
1075
1076 Value *NumBytes =
1077 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1078
1079 if (!SplatValue && (M, TLI, LibFunc_memset_pattern16))
1080 return Changed;
1081
1084 AATags = AATags.merge(Store->getAAMetadata());
1085 if (auto CI = dyn_cast(NumBytes))
1086 AATags = AATags.extendTo(CI->getZExtValue());
1087 else
1088 AATags = AATags.extendTo(-1);
1089
1091 if (SplatValue) {
1092 NewCall = Builder.CreateMemSet(
1093 BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
1094 false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1095 } else {
1097
1098 Type *Int8PtrTy = DestInt8PtrTy;
1099
1100 StringRef FuncName = "memset_pattern16";
1102 Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
1104
1105
1106
1109 PatternValue, ".memset_pattern");
1112 Value *PatternPtr = GV;
1113 NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
1114
1115
1116 if (AATags.TBAA)
1117 NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA);
1118
1119 if (AATags.Scope)
1120 NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope);
1121
1124 }
1125
1127
1128 if (MSSAU) {
1129 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1131 MSSAU->insertDef(cast(NewMemAcc), true);
1132 }
1133
1134 LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
1135 << " from store to: " << *Ev << " at: " << *TheStore
1136 << "\n");
1137
1138 ORE.emit([&]() {
1141 R << "Transformed loop-strided store in "
1143 << " function into a call to "
1145 << "() intrinsic";
1146 if (!Stores.empty())
1148 for (auto *I : Stores) {
1149 R << ore::NV("FromBlock", I->getParent()->getName())
1151 }
1152 return R;
1153 });
1154
1155
1156
1157 for (auto *I : Stores) {
1158 if (MSSAU)
1159 MSSAU->removeMemoryAccess(I, true);
1161 }
1163 MSSAU->getMemorySSA()->verifyMemorySSA();
1164 ++NumMemSet;
1165 ExpCleaner.markResultUsed();
1166 return true;
1167}
1168
1169
1170
1171
1172bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1173 const SCEV *BECount) {
1174 assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
1175
1176 Value *StorePtr = SI->getPointerOperand();
1178 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1179
1180
1181 LoadInst *LI = cast(SI->getValueOperand());
1182 assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
1183
1184
1185
1186
1189
1191 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1192 SI->getAlign(), LI->getAlign(), SI, LI,
1193 StoreEv, LoadEv, BECount);
1194}
1195
1196namespace {
1197class MemmoveVerifier {
1198public:
1199 explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
1202 LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
1204 StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
1205 IsSameObject(BP1 == BP2) {}
1206
1207 bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
1209 bool IsMemCpy) const {
1210 if (IsMemCpy) {
1211
1212
1213 if ((!IsNegStride && LoadOff <= StoreOff) ||
1214 (IsNegStride && LoadOff >= StoreOff))
1215 return false;
1216 } else {
1217
1218
1219 int64_t LoadSize =
1220 DL.getTypeSizeInBits(TheLoad.getType()).getFixedValue() / 8;
1221 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1222 return false;
1223 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1224 (IsNegStride && LoadOff + LoadSize > StoreOff))
1225 return false;
1226 }
1227 return true;
1228 }
1229
1230private:
1232 int64_t LoadOff = 0;
1233 int64_t StoreOff = 0;
1234 const Value *BP1;
1235 const Value *BP2;
1236
1237public:
1238 const bool IsSameObject;
1239};
1240}
1241
1242bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1243 Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
1247
1248
1249
1250
1251 if (isa(TheStore))
1252 return false;
1253
1254
1255
1256
1260
1262
1263 bool Changed = false;
1266 Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
1267
1269 const SCEVConstant *ConstStoreSize = dyn_cast(StoreSizeSCEV);
1270
1271
1272 assert(ConstStoreSize && "store size is expected to be a constant");
1273
1275 bool IsNegStride = StoreSize == -Stride;
1276
1277
1278 if (IsNegStride)
1279 StrStart =
1281
1282
1283
1284
1285
1286
1287
1288 Value *StoreBasePtr = Expander.expandCodeFor(
1289 StrStart, Builder.getPtrTy(StrAS), Preheader->getTerminator());
1290
1291
1292
1293
1294
1295
1296
1297
1298 Changed = true;
1299
1301 IgnoredInsts.insert(TheStore);
1302
1303 bool IsMemCpy = isa(TheStore);
1304 const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
1305
1306 bool LoopAccessStore =
1308 StoreSizeSCEV, *AA, IgnoredInsts);
1309 if (LoopAccessStore) {
1310
1311
1312
1314 return Changed;
1315 IgnoredInsts.insert(TheLoad);
1317 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1318 ORE.emit([&]() {
1320 TheStore)
1321 << ore::NV("Inst", InstRemark) << " in "
1323 << " function will not be hoisted: "
1324 << ore::NV("Reason", "The loop may access store location");
1325 });
1326 return Changed;
1327 }
1328 IgnoredInsts.erase(TheLoad);
1329 }
1330
1333
1334
1335 if (IsNegStride)
1336 LdStart =
1338
1339
1340
1341 Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),
1343
1344
1345
1346 MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
1347 if (IsMemCpy && .IsSameObject)
1348 IgnoredInsts.erase(TheStore);
1350 StoreSizeSCEV, *AA, IgnoredInsts)) {
1351 ORE.emit([&]() {
1353 << ore::NV("Inst", InstRemark) << " in "
1355 << " function will not be hoisted: "
1356 << ore::NV("Reason", "The loop may access load location");
1357 });
1358 return Changed;
1359 }
1360
1361 bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1362 if (UseMemMove)
1363 if (.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1364 IsMemCpy))
1365 return Changed;
1366
1367 if (avoidLIRForMultiBlockLoop())
1368 return Changed;
1369
1370
1371
1372 const SCEV *NumBytesS =
1373 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1374
1375 Value *NumBytes =
1376 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1377
1380 AATags = AATags.merge(StoreAATags);
1381 if (auto CI = dyn_cast(NumBytes))
1382 AATags = AATags.extendTo(CI->getZExtValue());
1383 else
1384 AATags = AATags.extendTo(-1);
1385
1386 CallInst *NewCall = nullptr;
1387
1388
1389
1391 if (UseMemMove)
1392 NewCall = Builder.CreateMemMove(
1393 StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1394 false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
1395 else
1396 NewCall =
1397 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1398 NumBytes, false, AATags.TBAA,
1400 } else {
1401
1402 if (UseMemMove)
1403 return Changed;
1404
1405
1406 assert((StoreAlign && LoadAlign) &&
1407 "Expect unordered load/store to have align.");
1408 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1409 return Changed;
1410
1411
1412
1413
1414
1416 return Changed;
1417
1418
1419
1420
1421 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1422 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1424 }
1426
1427 if (MSSAU) {
1428 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1430 MSSAU->insertDef(cast(NewMemAcc), true);
1431 }
1432
1433 LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"
1434 << " from load ptr=" << *LoadEv << " at: " << *TheLoad
1435 << "\n"
1436 << " from store ptr=" << *StoreEv << " at: " << *TheStore
1437 << "\n");
1438
1439 ORE.emit([&]() {
1442 << "Formed a call to "
1444 << "() intrinsic from " << ore::NV("Inst", InstRemark)
1445 << " instruction in " << ore::NV("Function", TheStore->getFunction())
1446 << " function"
1450 });
1451
1452
1453
1454 if (MSSAU)
1455 MSSAU->removeMemoryAccess(TheStore, true);
1458 MSSAU->getMemorySSA()->verifyMemorySSA();
1459 if (UseMemMove)
1460 ++NumMemMove;
1461 else
1462 ++NumMemCpy;
1463 ExpCleaner.markResultUsed();
1464 return true;
1465}
1466
1467
1468
1469
1470bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1471 bool IsLoopMemset) {
1472 if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1473 if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
1475 << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1476 << " avoided: multi-block top-level loop\n");
1477 return true;
1478 }
1479 }
1480
1481 return false;
1482}
1483
1484bool LoopIdiomRecognize::runOnNoncountableLoop() {
1487 << "] Noncountable Loop %"
1489
1490 return recognizePopcount() || recognizeAndInsertFFS() ||
1491 recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1492 recognizeShiftUntilLessThan();
1493}
1494
1495
1496
1497
1498
1499
1500
1502 bool JmpOnZero = false) {
1504 return nullptr;
1505
1508 return nullptr;
1509
1510 ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1));
1511 if (!CmpZero || !CmpZero->isZero())
1512 return nullptr;
1513
1516 if (JmpOnZero)
1518
1522 return Cond->getOperand(0);
1523
1524 return nullptr;
1525}
1526
1527
1528
1529
1530
1532 APInt &Threshold) {
1534 return nullptr;
1535
1538 return nullptr;
1539
1540 ConstantInt *CmpConst = dyn_cast(Cond->getOperand(1));
1541 if (!CmpConst)
1542 return nullptr;
1543
1546
1548 Threshold = CmpConst->getValue();
1549 return Cond->getOperand(0);
1550 }
1551
1552 return nullptr;
1553}
1554
1555
1556
1559 auto *PhiX = dyn_cast(VarX);
1560 if (PhiX && PhiX->getParent() == LoopEntry &&
1561 (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1562 return PhiX;
1563 return nullptr;
1564}
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1597 APInt &Threshold) {
1599
1600 DefX = nullptr;
1601 CntInst = nullptr;
1602 CntPhi = nullptr;
1604
1605
1607 dyn_cast(LoopEntry->getTerminator()), LoopEntry,
1608 Threshold))
1609 DefX = dyn_cast(T);
1610 else
1611 return false;
1612
1613
1614 if (!DefX || !isa(DefX))
1615 return false;
1616
1617 PHINode *VarPhi = cast(DefX);
1619 if (Idx == -1)
1620 return false;
1621
1624 return false;
1625
1626
1627 if (DefX->getOpcode() != Instruction::LShr)
1628 return false;
1629
1630 IntrinID = Intrinsic::ctlz;
1632 if (!Shft || !Shft->isOne())
1633 return false;
1634
1636
1637
1638
1639
1640
1641
1642
1643
1646 if (Inst.getOpcode() != Instruction::Add)
1647 continue;
1648
1651 continue;
1652
1654 if (!Phi)
1655 continue;
1656
1657 CntInst = &Inst;
1658 CntPhi = Phi;
1659 break;
1660 }
1661 if (!CntInst)
1662 return false;
1663
1664 return true;
1665}
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1694
1695
1698 Value *VarX1, *VarX0;
1699 PHINode *PhiX, *CountPhi;
1700
1701 DefX2 = CountInst = nullptr;
1702 VarX1 = VarX0 = nullptr;
1703 PhiX = CountPhi = nullptr;
1705
1706
1707 {
1709 dyn_cast(LoopEntry->getTerminator()), LoopEntry))
1710 DefX2 = dyn_cast(T);
1711 else
1712 return false;
1713 }
1714
1715
1716 {
1717 if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1718 return false;
1719
1721
1722 if ((SubOneOp = dyn_cast(DefX2->getOperand(0))))
1724 else {
1726 SubOneOp = dyn_cast(DefX2->getOperand(1));
1727 }
1728 if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1729 return false;
1730
1732 if (!Dec ||
1733 !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1734 (SubOneOp->getOpcode() == Instruction::Add &&
1736 return false;
1737 }
1738 }
1739
1740
1742 if (!PhiX)
1743 return false;
1744
1745
1746 {
1747 CountInst = nullptr;
1750 if (Inst.getOpcode() != Instruction::Add)
1751 continue;
1752
1754 if (!Inc || !Inc->isOne())
1755 continue;
1756
1758 if (!Phi)
1759 continue;
1760
1761
1762 bool LiveOutLoop = false;
1764 if ((cast(U))->getParent() != LoopEntry) {
1765 LiveOutLoop = true;
1766 break;
1767 }
1768 }
1769
1770 if (LiveOutLoop) {
1771 CountInst = &Inst;
1772 CountPhi = Phi;
1773 break;
1774 }
1775 }
1776
1777 if (!CountInst)
1778 return false;
1779 }
1780
1781
1782
1783 {
1784 auto *PreCondBr = dyn_cast(PreCondBB->getTerminator());
1787 return false;
1788
1789 CntInst = CountInst;
1790 CntPhi = CountPhi;
1791 Var = T;
1792 }
1793
1794 return true;
1795}
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1829 Value *VarX = nullptr;
1830
1831 DefX = nullptr;
1832 CntInst = nullptr;
1833 CntPhi = nullptr;
1835
1836
1838 dyn_cast(LoopEntry->getTerminator()), LoopEntry))
1839 DefX = dyn_cast(T);
1840 else
1841 return false;
1842
1843
1844 if (!DefX || !DefX->isShift())
1845 return false;
1846 IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1847 Intrinsic::ctlz;
1849 if (!Shft || !Shft->isOne())
1850 return false;
1852
1853
1855 if (!PhiX)
1856 return false;
1857
1859
1860
1861
1863 return false;
1864
1865
1866
1867
1868
1869
1870
1871
1874 if (Inst.getOpcode() != Instruction::Add)
1875 continue;
1876
1879 continue;
1880
1882 if (!Phi)
1883 continue;
1884
1885 CntInst = &Inst;
1886 CntPhi = Phi;
1887 break;
1888 }
1889 if (!CntInst)
1890 return false;
1891
1892 return true;
1893}
1894
1895
1896
1897bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
1898 Value *InitX, bool ZeroCheck,
1899 size_t CanonicalSize) {
1902
1903
1906 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1907
1912 return false;
1913
1914 return true;
1915}
1916
1917
1918
1919
1920bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
1924 bool IsCntPhiUsedOutsideLoop = false;
1926 if (!CurLoop->contains(cast(U))) {
1927 IsCntPhiUsedOutsideLoop = true;
1928 break;
1929 }
1930 bool IsCntInstUsedOutsideLoop = false;
1931 for (User *U : CntInst->users())
1932 if (!CurLoop->contains(cast(U))) {
1933 IsCntInstUsedOutsideLoop = true;
1934 break;
1935 }
1936
1937
1938 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1939 return false;
1940
1941
1942
1943
1944 bool ZeroCheck = false;
1945
1946
1948
1949
1950
1951
1952
1953 if (!IsCntPhiUsedOutsideLoop) {
1955 if (!PreCondBB)
1956 return false;
1957 auto *PreCondBI = dyn_cast(PreCondBB->getTerminator());
1958 if (!PreCondBI)
1959 return false;
1961 return false;
1962 ZeroCheck = true;
1963 }
1964
1965
1966
1967
1968
1969
1970
1971
1972 size_t IdiomCanonicalSize = 6;
1973 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1974 return false;
1975
1976 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1978 IsCntPhiUsedOutsideLoop);
1979 return true;
1980}
1981
1982
1983
1984
1985bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1986
1988 return false;
1989
1993 PHINode *CntPhi = nullptr;
1995
1997 DefX))
1998 return false;
1999
2000 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2001}
2002
2003bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
2004
2006 return false;
2007
2011 PHINode *CntPhi = nullptr;
2013
2014 APInt LoopThreshold;
2016 CntPhi, DefX, LoopThreshold))
2017 return false;
2018
2019 if (LoopThreshold == 2) {
2020
2021 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2022 }
2023
2024
2025 if (LoopThreshold != 4)
2026 return false;
2027
2028
2030 if (!CurLoop->contains(cast(U)))
2031 return false;
2032
2033
2034
2037 if (!PreCondBB)
2038 return false;
2039 auto *PreCondBI = dyn_cast(PreCondBB->getTerminator());
2040 if (!PreCondBI)
2041 return false;
2042
2043 APInt PreLoopThreshold;
2045 PreLoopThreshold != 2)
2046 return false;
2047
2048 bool ZeroCheck = true;
2049
2050
2051
2052
2053
2054
2055
2056
2057 size_t IdiomCanonicalSize = 6;
2058 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2059 return false;
2060
2061
2062 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2064 false,
2065 true);
2066 return true;
2067}
2068
2069
2070
2071
2072
2073bool LoopIdiomRecognize::recognizePopcount() {
2075 return false;
2076
2077
2078
2079
2080
2081
2082
2084 return false;
2085
2087 if (LoopBody->size() >= 20) {
2088
2089 return false;
2090 }
2091
2092
2095 return false;
2096 auto *EntryBI = dyn_cast(PH->getTerminator());
2097 if (!EntryBI || EntryBI->isConditional())
2098 return false;
2099
2100
2101
2103 if (!PreCondBB)
2104 return false;
2105 auto *PreCondBI = dyn_cast(PreCondBB->getTerminator());
2106 if (!PreCondBI || PreCondBI->isUnconditional())
2107 return false;
2108
2113 return false;
2114
2115 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
2116 return true;
2117}
2118
2121 Value *Ops[] = {Val};
2123
2126
2127 return CI;
2128}
2129
2135
2138
2139 return CI;
2140}
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173void LoopIdiomRecognize::transformLoopToCountable(
2176 bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
2178
2179
2181 Builder.SetCurrentDebugLocation(DL);
2182
2183
2184
2185
2186
2187
2188
2189 Value *InitXNext;
2190 if (IsCntPhiUsedOutsideLoop) {
2191 if (DefX->getOpcode() == Instruction::AShr)
2192 InitXNext = Builder.CreateAShr(InitX, 1);
2193 else if (DefX->getOpcode() == Instruction::LShr)
2194 InitXNext = Builder.CreateLShr(InitX, 1);
2195 else if (DefX->getOpcode() == Instruction::Shl)
2196 InitXNext = Builder.CreateShl(InitX, 1);
2197 else
2199 } else
2200 InitXNext = InitX;
2204 Count = Builder.CreateSub(
2206 if (InsertSub)
2207 Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
2208 Value *NewCount = Count;
2209 if (IsCntPhiUsedOutsideLoop)
2210 Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
2211
2212 NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());
2213
2215 if (cast(CntInst->getOperand(1))->isOne()) {
2216
2217
2218 ConstantInt *InitConst = dyn_cast(CntInitVal);
2219 if (!InitConst || !InitConst->isZero())
2220 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2221 } else {
2222
2223
2224 NewCount = Builder.CreateSub(CntInitVal, NewCount);
2225 }
2226
2227
2228
2229
2230
2231
2232
2233
2234
2236 auto *LbBr = cast(Body->getTerminator());
2237 ICmpInst *LbCond = cast(LbBr->getCondition());
2238
2241
2242 Builder.SetInsertPoint(LbCond);
2243 Instruction *TcDec = cast(Builder.CreateSub(
2244 TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));
2245
2248
2253 LbCond->setOperand(1, ConstantInt::get(CountTy, 0));
2254
2255
2256
2257 if (IsCntPhiUsedOutsideLoop)
2259 else
2261
2262
2263
2265}
2266
2267void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
2271 auto *PreCondBr = cast(PreCondBB->getTerminator());
2273
2274
2275
2276
2277
2278
2280 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2281 {
2283 NewCount = PopCntZext =
2284 Builder.CreateZExtOrTrunc(PopCnt, cast(CntPhi->getType()));
2285
2286 if (NewCount != PopCnt)
2287 (cast(NewCount))->setDebugLoc(DL);
2288
2289
2290 TripCnt = NewCount;
2291
2292
2294 ConstantInt *InitConst = dyn_cast(CntInitVal);
2295 if (!InitConst || !InitConst->isZero()) {
2296 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2297 (cast(NewCount))->setDebugLoc(DL);
2298 }
2299 }
2300
2301
2302
2303
2304
2305 {
2306 ICmpInst *PreCond = cast(PreCondBr->getCondition());
2307
2308 Value *Opnd0 = PopCntZext;
2309 Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
2312
2313 ICmpInst *NewPreCond = cast(
2314 Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
2315 PreCondBr->setCondition(NewPreCond);
2316
2318 }
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2341 {
2342 auto *LbBr = cast(Body->getTerminator());
2343 ICmpInst *LbCond = cast(LbBr->getCondition());
2345
2348
2349 Builder.SetInsertPoint(LbCond);
2351 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
2352 "tcdec", false, true));
2353
2356
2361 LbCond->setOperand(1, ConstantInt::get(Ty, 0));
2362 }
2363
2364
2365
2366
2368
2369
2370
2372}
2373
2374
2378
2380 : SubPattern(SP), L(L) {}
2381
2382 template bool match(ITy *V) {
2383 return L->isLoopInvariant(V) && SubPattern.match(V);
2384 }
2385};
2386
2387
2388template
2391}
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2419 " Performing shift-until-bittest idiom detection.\n");
2420
2421
2424 return false;
2425 }
2426
2429 assert(LoopPreheaderBB && "There is always a loop preheader.");
2430
2431 using namespace PatternMatch;
2432
2433
2434
2436 Value *CmpLHS, *CmpRHS;
2442 return false;
2443 }
2444
2445
2446
2447 auto MatchVariableBitMask = [&]() {
2454 CurLoop))));
2455 };
2456 auto MatchConstantBitMask = [&]() {
2461 };
2462 auto MatchDecomposableConstantBitMask = [&]() {
2464 if (Res && Res->Mask.isPowerOf2()) {
2466 Pred = Res->Pred;
2467 CurrX = Res->X;
2468 BitMask = ConstantInt::get(CurrX->getType(), Res->Mask);
2469 BitPos = ConstantInt::get(CurrX->getType(), Res->Mask.logBase2());
2470 return true;
2471 }
2472 return false;
2473 };
2474
2475 if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2476 !MatchDecomposableConstantBitMask()) {
2478 return false;
2479 }
2480
2481
2482 auto *CurrXPN = dyn_cast(CurrX);
2483 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2485 return false;
2486 }
2487
2488 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2489 NextX =
2490 dyn_cast(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2491
2493 "Expected BaseX to be available in the preheader!");
2494
2496
2498 return false;
2499 }
2500
2501
2502
2504 "Should only get equality predicates here.");
2505
2506
2510 }
2511
2512
2513
2514 if (TrueBB != LoopHeaderBB) {
2516 return false;
2517 }
2518
2519
2520 return true;
2521}
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2574 bool MadeChange = false;
2575
2576 Value *X, *BitMask, *BitPos, *XCurr;
2579 XNext)) {
2581 " shift-until-bittest idiom detection failed.\n");
2582 return MadeChange;
2583 }
2585
2586
2587
2588
2591 assert(LoopPreheaderBB && "There is always a loop preheader.");
2592
2594 assert(SuccessorBB && "There is only a single successor.");
2595
2597 Builder.SetCurrentDebugLocation(cast(XCurr)->getDebugLoc());
2598
2602
2605
2606
2607
2608
2610 IntrID, Ty, {PoisonValue::get(Ty), Builder.getTrue()});
2614 " Intrinsic is too costly, not beneficial\n");
2615 return MadeChange;
2616 }
2620 return MadeChange;
2621 }
2622
2623
2624 MadeChange = true;
2625
2627
2628
2629 std::optionalBasicBlock::iterator InsertPt = std::nullopt;
2630 if (auto *BitPosI = dyn_cast(BitPos))
2631 InsertPt = BitPosI->getInsertionPointAfterDef();
2632 else
2634 if (!InsertPt)
2635 return false;
2639 return U.getUser() != BitPosFrozen;
2640 });
2641 BitPos = BitPosFrozen;
2642 }
2643
2644
2645
2647 BitPos->getName() + ".lowbitmask");
2649 Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
2650 Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
2651 CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
2652 IntrID, Ty, {XMasked, Builder.getTrue()},
2653 nullptr, XMasked->getName() + ".numleadingzeros");
2654 Value *XMaskedNumActiveBits = Builder.CreateSub(
2655 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
2656 XMasked->getName() + ".numactivebits", true,
2657 Bitwidth != 2);
2658 Value *XMaskedLeadingOnePos =
2660 XMasked->getName() + ".leadingonepos", false,
2661 Bitwidth > 2);
2662
2663 Value *LoopBackedgeTakenCount = Builder.CreateSub(
2664 BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
2665 true, true);
2666
2667
2668 Value *LoopTripCount =
2669 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2670 CurLoop->getName() + ".tripcount", true,
2671 Bitwidth != 2);
2672
2673
2674
2675
2676
2677 Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
2679 if (auto *I = dyn_cast(NewX))
2680 I->copyIRFlags(XNext, true);
2681
2682 Value *NewXNext;
2683
2684
2685
2686
2692 NewXNext = Builder.CreateShl(X, LoopTripCount);
2693 else {
2694
2695
2696
2697 NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
2698 }
2699
2701 if (auto *I = dyn_cast(NewXNext))
2702 I->copyIRFlags(XNext, true);
2703
2704
2705
2706
2709
2710
2711
2712
2713 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
2714 auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
2715
2716
2717
2718 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
2719 auto *IVNext =
2720 Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
2721 true, Bitwidth != 2);
2722
2723
2724 auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
2725 CurLoop->getName() + ".ivcheck");
2726 Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2728
2729
2730 IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2731 IV->addIncoming(IVNext, LoopHeaderBB);
2732
2733
2734
2735
2737
2738
2739
2741
2742 ++NumShiftUntilBitTest;
2743 return MadeChange;
2744}
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2778 const SCEV *&ExtraOffsetExpr,
2779 bool &InvertedCond) {
2781 " Performing shift-until-zero idiom detection.\n");
2782
2783
2786 return false;
2787 }
2788
2789 Instruction *ValShifted, *NBits, *IVNext;
2790 Value *ExtraOffset;
2791
2794 assert(LoopPreheaderBB && "There is always a loop preheader.");
2795
2796 using namespace PatternMatch;
2797
2798
2799
2805 (ValShiftedIsZero,
2809 return false;
2810 }
2811
2812
2813
2817 return false;
2818 }
2819 IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz
2820 : Intrinsic::ctlz;
2821
2822
2823
2828 else if (match(NBits,
2832 ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
2833 else {
2834 IV = NBits;
2836 }
2837
2838
2839 auto *IVPN = dyn_cast(IV);
2840 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
2842 return false;
2843 }
2844
2845 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2846 IVNext = dyn_cast(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2847
2850 return false;
2851 }
2852
2853
2854
2856 "Should only get equality predicates here.");
2857
2858
2860 if (InvertedCond) {
2863 }
2864
2865
2866
2867 if (FalseBB != LoopHeaderBB) {
2869 return false;
2870 }
2871
2872
2873
2874
2875
2876
2877
2878 if (ValShifted->getOpcode() == Instruction::AShr &&
2881 return false;
2882 }
2883
2884
2885 return true;
2886}
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2943 bool MadeChange = false;
2944
2948 Value *Start, *Val;
2949 const SCEV *ExtraOffsetExpr;
2950 bool InvertedCond;
2952 Start, Val, ExtraOffsetExpr, InvertedCond)) {
2954 " shift-until-zero idiom detection failed.\n");
2955 return MadeChange;
2956 }
2958
2959
2960
2961
2964 assert(LoopPreheaderBB && "There is always a loop preheader.");
2965
2967 assert(SuccessorBB && "There is only a single successor.");
2968
2970 Builder.SetCurrentDebugLocation(IV->getDebugLoc());
2971
2974
2977
2978
2979
2980
2982 IntrID, Ty, {PoisonValue::get(Ty), Builder.getFalse()});
2986 " Intrinsic is too costly, not beneficial\n");
2987 return MadeChange;
2988 }
2989
2990
2991 MadeChange = true;
2992
2993 bool OffsetIsZero = false;
2994 if (auto *ExtraOffsetExprC = dyn_cast(ExtraOffsetExpr))
2995 OffsetIsZero = ExtraOffsetExprC->isZero();
2996
2997
2998
2999 CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
3000 IntrID, Ty, {Val, Builder.getFalse()},
3001 nullptr, Val->getName() + ".numleadingzeros");
3002 Value *ValNumActiveBits = Builder.CreateSub(
3004 Val->getName() + ".numactivebits", true,
3005 Bitwidth != 2);
3006
3008 Expander.setInsertPoint(&*Builder.GetInsertPoint());
3009 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
3010
3011 Value *ValNumActiveBitsOffset = Builder.CreateAdd(
3012 ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
3013 OffsetIsZero, true);
3014 Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
3015 {ValNumActiveBitsOffset, Start},
3016 nullptr, "iv.final");
3017
3018 auto *LoopBackedgeTakenCount = cast(Builder.CreateSub(
3019 IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
3020 OffsetIsZero, true));
3021
3022
3023
3024 Value *LoopTripCount =
3025 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
3026 CurLoop->getName() + ".tripcount", true,
3027 Bitwidth != 2);
3028
3029
3030
3031
3032 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
3033
3034
3035
3036
3037 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
3038 auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
3039
3040
3041 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->getFirstNonPHIIt());
3042 auto *CIVNext =
3043 Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
3044 true, Bitwidth != 2);
3045
3046
3047 auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
3048 CurLoop->getName() + ".ivcheck");
3049 auto *NewIVCheck = CIVCheck;
3050 if (InvertedCond) {
3051 NewIVCheck = Builder.CreateNot(CIVCheck);
3052 NewIVCheck->takeName(ValShiftedIsZero);
3053 }
3054
3055
3056 auto *IVDePHId = Builder.CreateAdd(CIV, Start, "", false,
3057 true);
3058 IVDePHId->takeName(IV);
3059
3060
3061 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
3062 Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
3064
3065
3066 CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
3067 CIV->addIncoming(CIVNext, LoopHeaderBB);
3068
3069
3070
3071
3073
3074
3075 IV->replaceAllUsesWith(IVDePHId);
3076 IV->eraseFromParent();
3077
3080
3081
3082
3084
3085 ++NumShiftUntilZero;
3086 return MadeChange;
3087}
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX, APInt &Threshold)
Return true if the idiom is detected in the loop.
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
static Value * matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry, APInt &Threshold)
Check if the given conditional branch is based on an unsigned less-than comparison between a variable...
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
static void deleteDeadInstruction(Instruction *I)
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling " "with -Os/-Oz"), cl::init(true), cl::Hidden)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
std::optional< uint64_t > tryZExtValue() const
Get zero extended value if possible.
unsigned getBitWidth() const
Return the number of bits in the APInt.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_ULT
unsigned less than
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Predicate getPredicate() const
Return the predicate for this instruction.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getExactLogBase2(Constant *C)
If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
void setUnnamedAddr(UnnamedAddr Val)
@ PrivateLinkage
Like Internal, but omit from symbol table.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
This class implements a map that also provides access to all stored values in a deterministic order.
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A vector that has set insertion semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
TargetCostKind
The kind of cost model.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
@ TCC_Basic
The cost of a typical 'add' instruction.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, LibFunc TheLibFunc, FunctionType *T, AttributeList AttributeList)
Calls getOrInsertFunction() and then makes sure to add mandatory argument attributes.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ ModRef
The access may reference and may modify the value stored in memory.
@ Mod
The access may modify the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
std::optional< DecomposedBitTest > decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, bool LookThroughTrunc=true, bool AllowNonZeroC=false)
Decompose an icmp into the form ((X & Mask) pred C) if possible.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * TBAAStruct
The tag for type-based alias analysis (tbaa struct).
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * TBAA
The tag for type-based alias analysis.
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
MDNode * NoAlias
The tag specifying the noalias scope.
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
This struct is a compact representation of a valid (non-zero power of two) alignment.
static bool Memset
When true, Memset is disabled.
static bool All
When true, the entire pass is disabled.
static bool Memcpy
When true, Memcpy is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)