LLVM: lib/CodeGen/ExpandMemCmp.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
34#include
35
36using namespace llvm;
38
39namespace llvm {
41}
42
43#define DEBUG_TYPE "expand-memcmp"
44
45STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
46STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
48 "Number of memcmp calls with size greater than max size");
49STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
50
53 cl::desc("The number of loads per basic block for inline expansion of "
54 "memcmp that is only being compared against zero."));
55
58 cl::desc("Set maximum number of loads used in expanded memcmp"));
59
61 "max-loads-per-memcmp-opt-size", cl::Hidden,
62 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
63
64namespace {
65
66
67
68
69class MemCmpExpansion {
70 struct ResultBlock {
72 PHINode *PhiSrc1 = nullptr;
73 PHINode *PhiSrc2 = nullptr;
74
75 ResultBlock() = default;
76 };
77
78 CallInst *const CI = nullptr;
79 ResultBlock ResBlock;
80 const uint64_t Size;
81 unsigned MaxLoadSize = 0;
82 uint64_t NumLoadsNonOneByte = 0;
83 const uint64_t NumLoadsPerBlockForZeroCmp;
84 std::vector<BasicBlock *> LoadCmpBlocks;
86 PHINode *PhiRes = nullptr;
87 const bool IsUsedForZeroCmp;
88 const DataLayout &DL;
89 DomTreeUpdater *DTU = nullptr;
91
92
93
94 struct LoadEntry {
95 LoadEntry(unsigned LoadSize, uint64_t Offset)
97 }
98
99
100 unsigned LoadSize;
101
103 };
104 using LoadEntryVector = SmallVector<LoadEntry, 8>;
105 LoadEntryVector LoadSequence;
106
107 void createLoadCmpBlocks();
108 void createResultBlock();
109 void setupResultBlockPHINodes();
110 void setupEndBlockPHINodes();
111 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
112 void emitLoadCompareBlock(unsigned BlockIndex);
113 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
114 unsigned &LoadIndex);
115 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
116 void emitMemCmpResultBlock();
117 Value *getMemCmpExpansionZeroCase();
118 Value *getMemCmpEqZeroOneBlock();
119 Value *getMemCmpOneBlock();
120 struct LoadPair {
121 Value *Lhs = nullptr;
122 Value *Rhs = nullptr;
123 };
124 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,
125 Type *CmpSizeType, unsigned OffsetBytes);
126
127 static LoadEntryVector
128 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef LoadSizes,
129 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
130 static LoadEntryVector
131 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
132 unsigned MaxNumLoads,
133 unsigned &NumLoadsNonOneByte);
134
135 static void optimiseLoadSequence(
136 LoadEntryVector &LoadSequence,
137 const TargetTransformInfo::MemCmpExpansionOptions &Options,
138 bool IsUsedForZeroCmp);
139
140public:
141 MemCmpExpansion(CallInst *CI, uint64_t Size,
142 const TargetTransformInfo::MemCmpExpansionOptions &Options,
143 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
144 DomTreeUpdater *DTU);
145
146 unsigned getNumBlocks();
147 uint64_t getNumLoads() const { return LoadSequence.size(); }
148
149 Value *getMemCmpExpansion();
150};
151
152MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
153 uint64_t Size, llvm::ArrayRef LoadSizes,
154 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
155 NumLoadsNonOneByte = 0;
156 LoadEntryVector LoadSequence;
158 while (Size && !LoadSizes.empty()) {
159 const unsigned LoadSize = LoadSizes.front();
160 const uint64_t NumLoadsForThisSize = Size / LoadSize;
161 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
162
163
164
165
166 return {};
167 }
168 if (NumLoadsForThisSize > 0) {
169 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
170 LoadSequence.push_back({LoadSize, Offset});
172 }
173 if (LoadSize > 1)
174 ++NumLoadsNonOneByte;
176 }
178 }
179 return LoadSequence;
180}
181
182MemCmpExpansion::LoadEntryVector
183MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
184 const unsigned MaxLoadSize,
185 const unsigned MaxNumLoads,
186 unsigned &NumLoadsNonOneByte) {
187
188 if (Size < 2 || MaxLoadSize < 2)
189 return {};
190
191
192
193 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
194 assert(NumNonOverlappingLoads && "there must be at least one load");
195
196
197 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
198
199
200 if (Size == 0)
201 return {};
202
203
204 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
205 return {};
206
207
208 LoadEntryVector LoadSequence;
210 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
211 LoadSequence.push_back({MaxLoadSize, Offset});
212 Offset += MaxLoadSize;
213 }
214
215
216 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
217 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
218 NumLoadsNonOneByte = 1;
219 return LoadSequence;
220}
221
222void MemCmpExpansion::optimiseLoadSequence(
223 LoadEntryVector &LoadSequence,
224 const TargetTransformInfo::MemCmpExpansionOptions &Options,
225 bool IsUsedForZeroCmp) {
226
227
228
229
230 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())
231 return;
232
233 while (LoadSequence.size() >= 2) {
234 auto Last = LoadSequence[LoadSequence.size() - 1];
235 auto PreLast = LoadSequence[LoadSequence.size() - 2];
236
237
238 if (PreLast.Offset + PreLast.LoadSize != Last.Offset)
239 break;
240
241 auto LoadSize = Last.LoadSize + PreLast.LoadSize;
242 if (find(Options.AllowedTailExpansions, LoadSize) ==
243 Options.AllowedTailExpansions.end())
244 break;
245
246
247 LoadSequence.pop_back();
248 LoadSequence.pop_back();
249 LoadSequence.emplace_back(PreLast.Offset, LoadSize);
250 }
251}
252
253
254
255
256
257
258
259
260
261MemCmpExpansion::MemCmpExpansion(
262 CallInst *const CI, uint64_t Size,
263 const TargetTransformInfo::MemCmpExpansionOptions &Options,
264 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
265 DomTreeUpdater *DTU)
266 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
267 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
268 Builder(CI) {
270
272 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
274 }
275 assert(!LoadSizes.empty() && "cannot load Size bytes");
276 MaxLoadSize = LoadSizes.front();
277
278 unsigned GreedyNumLoadsNonOneByte = 0;
279 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
280 GreedyNumLoadsNonOneByte);
281 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
282 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
283
284
285 if (Options.AllowOverlappingLoads &&
286 (LoadSequence.empty() || LoadSequence.size() > 2)) {
287 unsigned OverlappingNumLoadsNonOneByte = 0;
288 auto OverlappingLoads = computeOverlappingLoadSequence(
289 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
290 if (!OverlappingLoads.empty() &&
291 (LoadSequence.empty() ||
292 OverlappingLoads.size() < LoadSequence.size())) {
293 LoadSequence = OverlappingLoads;
294 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
295 }
296 }
297 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
298 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);
299}
300
301unsigned MemCmpExpansion::getNumBlocks() {
302 if (IsUsedForZeroCmp)
303 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
304 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
305 return getNumLoads();
306}
307
308void MemCmpExpansion::createLoadCmpBlocks() {
309 for (unsigned i = 0; i < getNumBlocks(); i++) {
311 EndBlock->getParent(), EndBlock);
312 LoadCmpBlocks.push_back(BB);
313 }
314}
315
316void MemCmpExpansion::createResultBlock() {
318 EndBlock->getParent(), EndBlock);
319}
320
321MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
322 Type *BSwapSizeType,
323 Type *CmpSizeType,
324 unsigned OffsetBytes) {
325
330 if (OffsetBytes > 0) {
331 auto *ByteType = Type::getInt8Ty(CI->getContext());
332 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
333 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
336 }
337
338
339 Value *Lhs = nullptr;
342 if (!Lhs)
343 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
344
345 Value *Rhs = nullptr;
348 if (!Rhs)
349 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
350
351
352 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
353 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);
354 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);
355 }
356
357
358 if (BSwapSizeType) {
360 CI->getModule(), Intrinsic::bswap, BSwapSizeType);
361 Lhs = Builder.CreateCall(Bswap, Lhs);
362 Rhs = Builder.CreateCall(Bswap, Rhs);
363 }
364
365
366 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {
367 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
368 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
369 }
370 return {Lhs, Rhs};
371}
372
373
374
375
376
377void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
378 unsigned OffsetBytes) {
379 BasicBlock *BB = LoadCmpBlocks[BlockIndex];
381 const LoadPair Loads =
382 getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr,
383 Type::getInt32Ty(CI->getContext()), OffsetBytes);
384 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
385
387
388 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
389
390
392 ConstantInt::get(Diff->getType(), 0));
393 BranchInst *CmpBr =
395 Builder.Insert(CmpBr);
396 if (DTU)
398 {{DominatorTree::Insert, BB, EndBlock},
399 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
400 } else {
401
403 Builder.Insert(CmpBr);
404 if (DTU)
405 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
406 }
407}
408
409
410
411
412Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
413 unsigned &LoadIndex) {
414 assert(LoadIndex < getNumLoads() &&
415 "getCompareLoadPairs() called with no remaining loads");
416 std::vector<Value *> XorList, OrList;
417 Value *Diff = nullptr;
418
419 const unsigned NumLoads =
420 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
421
422
423 if (LoadCmpBlocks.empty())
425 else
427
429
430
431
432 IntegerType *const MaxLoadType =
433 NumLoads == 1 ? nullptr
435
436 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
437 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
438 const LoadPair Loads = getLoadPair(
440 MaxLoadType, CurLoadEntry.Offset);
441
442 if (NumLoads != 1) {
443
444
445 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
446 Diff = Builder.CreateZExt(Diff, MaxLoadType);
447 XorList.push_back(Diff);
448 } else {
449
451 }
452 }
453
454 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
455 std::vector<Value *> OutList;
456 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
458 OutList.push_back(Or);
459 }
460 if (InList.size() % 2 != 0)
461 OutList.push_back(InList.back());
462 return OutList;
463 };
464
465 if (!Cmp) {
466
467 OrList = pairWiseOr(XorList);
468
469
470 while (OrList.size() != 1) {
471 OrList = pairWiseOr(OrList);
472 }
473
474 assert(Diff && "Failed to find comparison diff");
476 }
477
478 return Cmp;
479}
480
481void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
482 unsigned &LoadIndex) {
483 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
484
485 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
486 ? EndBlock
487 : LoadCmpBlocks[BlockIndex + 1];
488
489
494 Builder.Insert(CmpBr);
495 if (DTU)
496 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
497 {DominatorTree::Insert, BB, NextBB}});
498
499
500
501
502 if (BlockIndex == LoadCmpBlocks.size() - 1) {
504 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
505 }
506}
507
508
509
510
511
512
513
514
515
516
517void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
518
519 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
520
521 if (CurLoadEntry.LoadSize == 1) {
522 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
523 return;
524 }
525
526 Type *LoadSizeType =
528 Type *BSwapSizeType =
529 DL.isLittleEndian()
532 : nullptr;
535 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);
536 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
537
539
540 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
541 CurLoadEntry.Offset);
542
543
544
545 if (!IsUsedForZeroCmp) {
546 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
547 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
548 }
549
550 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
551 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
552 ? EndBlock
553 : LoadCmpBlocks[BlockIndex + 1];
554
555
560 Builder.Insert(CmpBr);
561 if (DTU)
562 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
563 {DominatorTree::Insert, BB, ResBlock.BB}});
564
565
566
567
568 if (BlockIndex == LoadCmpBlocks.size() - 1) {
570 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
571 }
572}
573
574
575
576
577void MemCmpExpansion::emitMemCmpResultBlock() {
578
579
580 if (IsUsedForZeroCmp) {
583 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
586 Builder.Insert(NewBr);
587 if (DTU)
588 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
589 return;
590 }
593
594 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
595 ResBlock.PhiSrc2);
596
599 ConstantInt::get(Builder.getInt32Ty(), 1));
602
605 Builder.Insert(NewBr);
606 if (DTU)
607 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
608}
609
610void MemCmpExpansion::setupResultBlockPHINodes() {
613
614 ResBlock.PhiSrc1 =
615 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
616 ResBlock.PhiSrc2 =
617 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
618}
619
620void MemCmpExpansion::setupEndBlockPHINodes() {
622 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
623}
624
625Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
626 unsigned LoadIndex = 0;
627
628
629 for (unsigned I = 0; I < getNumBlocks(); ++I) {
630 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
631 }
632
633 emitMemCmpResultBlock();
634 return PhiRes;
635}
636
637
638
639
640Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
641 unsigned LoadIndex = 0;
642 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
643 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
645}
646
647
648
649
650
651
652Value *MemCmpExpansion::getMemCmpOneBlock() {
653 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
655 Type *BSwapSizeType =
657 : nullptr;
658 Type *MaxLoadType =
661
662
663
664 if (Size == 1 || Size == 2) {
665 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,
667 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
668 }
669
670 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
671 0);
672
673
674
675
678 CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
679 bool NeedsZExt = false;
680
681
682
683
684
688 Pred = ICmpInst::ICMP_SLT;
689 NeedsZExt = true;
692
693 Pred = ICmpInst::ICMP_SGE;
696
697 Pred = ICmpInst::ICMP_SLE;
698 } else {
699
701 }
702
703 if (ICmpInst::isSigned(Pred)) {
705 Loads.Lhs, Loads.Rhs);
707 UI->replaceAllUsesWith(Result);
708 UI->eraseFromParent();
710 return nullptr;
711 }
712 }
713
714
716 {Loads.Lhs, Loads.Rhs});
717}
718
719
720
721Value *MemCmpExpansion::getMemCmpExpansion() {
722
723 if (getNumBlocks() != 1) {
725 EndBlock = SplitBlock(StartBlock, CI, DTU, nullptr,
726 nullptr, "endblock");
727 setupEndBlockPHINodes();
728 createResultBlock();
729
730
731
732
733
734 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
735
736
737 createLoadCmpBlocks();
738
739
740
742 if (DTU)
743 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
744 {DominatorTree::Delete, StartBlock, EndBlock}});
745 }
746
748
749 if (IsUsedForZeroCmp)
750 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
751 : getMemCmpExpansionZeroCase();
752
753 if (getNumBlocks() == 1)
754 return getMemCmpOneBlock();
755
756 for (unsigned I = 0; I < getNumBlocks(); ++I) {
757 emitLoadCompareBlock(I);
758 }
759
760 emitMemCmpResultBlock();
761 return PhiRes;
762}
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
838 const TargetLowering *TLI, const DataLayout *DL,
839 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
840 DomTreeUpdater *DTU, const bool IsBCmp) {
841 NumMemCmpCalls++;
842
843
845 return false;
846
847
849 if (!SizeCast) {
850 NumMemCmpNotConstant++;
851 return false;
852 }
853 const uint64_t SizeVal = SizeCast->getZExtValue();
854
855 if (SizeVal == 0) {
856 return false;
857 }
858
859
860 const bool IsUsedForZeroCmp =
864 IsUsedForZeroCmp);
865 if () return false;
866
869
870 if (OptForSize &&
873
876
877 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
878
879
880 if (Expansion.getNumLoads() == 0) {
881 NumMemCmpGreaterThanMax++;
882 return false;
883 }
884
885 NumMemCmpInlined++;
886
888
891 }
892
893 return true;
894}
895
896
897static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
898 const TargetTransformInfo *TTI, const TargetLowering *TL,
899 const DataLayout &DL, ProfileSummaryInfo *PSI,
900 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
901
902static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
903 const TargetTransformInfo *TTI,
904 const TargetLowering *TL,
905 ProfileSummaryInfo *PSI,
906 BlockFrequencyInfo *BFI, DominatorTree *DT);
907
908class ExpandMemCmpLegacyPass : public FunctionPass {
909public:
910 static char ID;
911
912 ExpandMemCmpLegacyPass() : FunctionPass(ID) {
914 }
915
917 if (skipFunction(F)) return false;
918
919 auto *TPC = getAnalysisIfAvailable();
920 if (!TPC) {
921 return false;
922 }
923 const TargetLowering* TL =
924 TPC->getTM().getSubtargetImpl(F)->getTargetLowering();
925
926 const TargetLibraryInfo *TLI =
927 &getAnalysis().getTLI(F);
928 const TargetTransformInfo *TTI =
929 &getAnalysis().getTTI(F);
930 auto *PSI = &getAnalysis().getPSI();
932 &getAnalysis().getBFI() :
933 nullptr;
934 DominatorTree *DT = nullptr;
935 if (auto *DTWP = getAnalysisIfAvailable())
936 DT = &DTWP->getDomTree();
937 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
938 return !PA.areAllPreserved();
939 }
940
941private:
942 void getAnalysisUsage(AnalysisUsage &AU) const override {
943 AU.addRequired();
944 AU.addRequired();
945 AU.addRequired();
948 FunctionPass::getAnalysisUsage(AU);
949 }
950};
951
952bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
953 const TargetTransformInfo *TTI, const TargetLowering *TL,
954 const DataLayout &DL, ProfileSummaryInfo *PSI,
955 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
956 for (Instruction &I : BB) {
958 if (!CI) {
959 continue;
960 }
961 LibFunc Func;
963 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
964 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
965 return true;
966 }
967 }
968 return false;
969}
970
971PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
972 const TargetTransformInfo *TTI,
973 const TargetLowering *TL, ProfileSummaryInfo *PSI,
974 BlockFrequencyInfo *BFI, DominatorTree *DT) {
975 std::optional DTU;
976 if (DT)
977 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
978
979 const DataLayout& DL = F.getDataLayout();
980 bool MadeChanges = false;
981 for (auto BBIt = F.begin(); BBIt != F.end();) {
982 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
983 MadeChanges = true;
984
985
986 BBIt = F.begin();
987 } else {
988 ++BBIt;
989 }
990 }
991 if (MadeChanges)
992 for (BasicBlock &BB : F)
994 if (!MadeChanges)
996 PreservedAnalyses PA;
997 PA.preserve();
998 return PA;
999}
1000
1001}
1002
1005 const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
1009 .getCachedResult(*F.getParent());
1012 : nullptr;
1014
1015 return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);
1016}
1017
1018char ExpandMemCmpLegacyPass::ID = 0;
1020 "Expand memcmp() to load/stores", false, false)
1028
1030 return new ExpandMemCmpLegacyPass();
1031}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool runOnFunction(Function &F, bool PostInlining)
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const T & front() const
front - Get the first element.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Value * getArgOperand(unsigned i) const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition ExpandMemCmp.cpp:1003
FunctionPass class - This class is used to implement most global optimizations.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
bool hasProfileSummary() const
Returns true if profile summary is available.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Wrapper pass for TargetTransformInfo.
LLVM_ABI MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
const ParentTy * getParent() const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI void initializeExpandMemCmpLegacyPassPass(PassRegistry &)
LLVM_ABI FunctionPass * createExpandMemCmpLegacyPass()
Definition ExpandMemCmp.cpp:1029
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Or
Bitwise or logical OR of integers.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
unsigned NumLoadsPerBlock