LLVM: lib/CodeGen/ExpandMemCmp.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
33#include
34
35using namespace llvm;
37
38namespace llvm {
40}
41
42#define DEBUG_TYPE "expand-memcmp"
43
44STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
45STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
47 "Number of memcmp calls with size greater than max size");
48STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
49
52 cl::desc("The number of loads per basic block for inline expansion of "
53 "memcmp that is only being compared against zero."));
54
57 cl::desc("Set maximum number of loads used in expanded memcmp"));
58
60 "max-loads-per-memcmp-opt-size", cl::Hidden,
61 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
62
63namespace {
64
65
66
67
68class MemCmpExpansion {
69 struct ResultBlock {
71 PHINode *PhiSrc1 = nullptr;
72 PHINode *PhiSrc2 = nullptr;
73
74 ResultBlock() = default;
75 };
76
78 ResultBlock ResBlock;
80 unsigned MaxLoadSize = 0;
81 uint64_t NumLoadsNonOneByte = 0;
82 const uint64_t NumLoadsPerBlockForZeroCmp;
83 std::vector<BasicBlock *> LoadCmpBlocks;
85 PHINode *PhiRes = nullptr;
86 const bool IsUsedForZeroCmp;
90
91
92
93 struct LoadEntry {
96 }
97
98
99 unsigned LoadSize;
100
102 };
104 LoadEntryVector LoadSequence;
105
106 void createLoadCmpBlocks();
107 void createResultBlock();
108 void setupResultBlockPHINodes();
109 void setupEndBlockPHINodes();
110 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
111 void emitLoadCompareBlock(unsigned BlockIndex);
112 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
113 unsigned &LoadIndex);
114 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
115 void emitMemCmpResultBlock();
116 Value *getMemCmpExpansionZeroCase();
117 Value *getMemCmpEqZeroOneBlock();
118 Value *getMemCmpOneBlock();
119 struct LoadPair {
120 Value *Lhs = nullptr;
121 Value *Rhs = nullptr;
122 };
123 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,
124 Type *CmpSizeType, unsigned OffsetBytes);
125
126 static LoadEntryVector
128 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
129 static LoadEntryVector
130 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
131 unsigned MaxNumLoads,
132 unsigned &NumLoadsNonOneByte);
133
134 static void optimiseLoadSequence(
135 LoadEntryVector &LoadSequence,
137 bool IsUsedForZeroCmp);
138
139public:
142 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
144
145 unsigned getNumBlocks();
146 uint64_t getNumLoads() const { return LoadSequence.size(); }
147
148 Value *getMemCmpExpansion();
149};
150
153 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
154 NumLoadsNonOneByte = 0;
155 LoadEntryVector LoadSequence;
157 while (Size && !LoadSizes.empty()) {
158 const unsigned LoadSize = LoadSizes.front();
159 const uint64_t NumLoadsForThisSize = Size / LoadSize;
160 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
161
162
163
164
165 return {};
166 }
167 if (NumLoadsForThisSize > 0) {
168 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
169 LoadSequence.push_back({LoadSize, Offset});
171 }
172 if (LoadSize > 1)
173 ++NumLoadsNonOneByte;
175 }
177 }
178 return LoadSequence;
179}
180
182MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
183 const unsigned MaxLoadSize,
184 const unsigned MaxNumLoads,
185 unsigned &NumLoadsNonOneByte) {
186
187 if (Size < 2 || MaxLoadSize < 2)
188 return {};
189
190
191
192 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
193 assert(NumNonOverlappingLoads && "there must be at least one load");
194
195
196 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
197
198
199 if (Size == 0)
200 return {};
201
202
203 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
204 return {};
205
206
207 LoadEntryVector LoadSequence;
209 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
210 LoadSequence.push_back({MaxLoadSize, Offset});
211 Offset += MaxLoadSize;
212 }
213
214
215 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
216 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
217 NumLoadsNonOneByte = 1;
218 return LoadSequence;
219}
220
221void MemCmpExpansion::optimiseLoadSequence(
222 LoadEntryVector &LoadSequence,
224 bool IsUsedForZeroCmp) {
225
226
227
228
229 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())
230 return;
231
232 while (LoadSequence.size() >= 2) {
233 auto Last = LoadSequence[LoadSequence.size() - 1];
234 auto PreLast = LoadSequence[LoadSequence.size() - 2];
235
236
237 if (PreLast.Offset + PreLast.LoadSize != Last.Offset)
238 break;
239
240 auto LoadSize = Last.LoadSize + PreLast.LoadSize;
241 if (find(Options.AllowedTailExpansions, LoadSize) ==
242 Options.AllowedTailExpansions.end())
243 break;
244
245
246 LoadSequence.pop_back();
247 LoadSequence.pop_back();
248 LoadSequence.emplace_back(PreLast.Offset, LoadSize);
249 }
250}
251
252
253
254
255
256
257
258
259
260MemCmpExpansion::MemCmpExpansion(
263 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
265 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
266 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
267 Builder(CI) {
269
271 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
273 }
274 assert(!LoadSizes.empty() && "cannot load Size bytes");
275 MaxLoadSize = LoadSizes.front();
276
277 unsigned GreedyNumLoadsNonOneByte = 0;
278 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
279 GreedyNumLoadsNonOneByte);
280 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
281 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
282
283
284 if (Options.AllowOverlappingLoads &&
285 (LoadSequence.empty() || LoadSequence.size() > 2)) {
286 unsigned OverlappingNumLoadsNonOneByte = 0;
287 auto OverlappingLoads = computeOverlappingLoadSequence(
288 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
289 if (!OverlappingLoads.empty() &&
290 (LoadSequence.empty() ||
291 OverlappingLoads.size() < LoadSequence.size())) {
292 LoadSequence = OverlappingLoads;
293 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
294 }
295 }
296 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
297 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);
298}
299
300unsigned MemCmpExpansion::getNumBlocks() {
301 if (IsUsedForZeroCmp)
302 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
303 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
304 return getNumLoads();
305}
306
307void MemCmpExpansion::createLoadCmpBlocks() {
308 for (unsigned i = 0; i < getNumBlocks(); i++) {
310 EndBlock->getParent(), EndBlock);
311 LoadCmpBlocks.push_back(BB);
312 }
313}
314
315void MemCmpExpansion::createResultBlock() {
317 EndBlock->getParent(), EndBlock);
318}
319
320MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
321 Type *BSwapSizeType,
322 Type *CmpSizeType,
323 unsigned OffsetBytes) {
324
329 if (OffsetBytes > 0) {
331 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
332 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
335 }
336
337
338 Value *Lhs = nullptr;
339 if (auto *C = dyn_cast(LhsSource))
341 if (!Lhs)
342 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
343
344 Value *Rhs = nullptr;
345 if (auto *C = dyn_cast(RhsSource))
347 if (!Rhs)
348 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
349
350
351 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
352 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);
353 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);
354 }
355
356
357 if (BSwapSizeType) {
359 CI->getModule(), Intrinsic::bswap, BSwapSizeType);
360 Lhs = Builder.CreateCall(Bswap, Lhs);
361 Rhs = Builder.CreateCall(Bswap, Rhs);
362 }
363
364
365 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {
366 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
367 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
368 }
369 return {Lhs, Rhs};
370}
371
372
373
374
375
376void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
377 unsigned OffsetBytes) {
378 BasicBlock *BB = LoadCmpBlocks[BlockIndex];
380 const LoadPair Loads =
383 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
384
386
387 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
388
389
391 ConstantInt::get(Diff->getType(), 0));
394 Builder.Insert(CmpBr);
395 if (DTU)
397 {{DominatorTree::Insert, BB, EndBlock},
398 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
399 } else {
400
402 Builder.Insert(CmpBr);
403 if (DTU)
404 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
405 }
406}
407
408
409
410
411Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
412 unsigned &LoadIndex) {
413 assert(LoadIndex < getNumLoads() &&
414 "getCompareLoadPairs() called with no remaining loads");
415 std::vector<Value *> XorList, OrList;
416 Value *Diff = nullptr;
417
418 const unsigned NumLoads =
419 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
420
421
422 if (LoadCmpBlocks.empty())
424 else
426
428
429
430
432 NumLoads == 1 ? nullptr
434
435 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
436 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
437 const LoadPair Loads = getLoadPair(
439 MaxLoadType, CurLoadEntry.Offset);
440
441 if (NumLoads != 1) {
442
443
444 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
445 Diff = Builder.CreateZExt(Diff, MaxLoadType);
446 XorList.push_back(Diff);
447 } else {
448
450 }
451 }
452
453 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
454 std::vector<Value *> OutList;
455 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
457 OutList.push_back(Or);
458 }
459 if (InList.size() % 2 != 0)
460 OutList.push_back(InList.back());
461 return OutList;
462 };
463
464 if (!Cmp) {
465
466 OrList = pairWiseOr(XorList);
467
468
469 while (OrList.size() != 1) {
470 OrList = pairWiseOr(OrList);
471 }
472
473 assert(Diff && "Failed to find comparison diff");
475 }
476
477 return Cmp;
478}
479
480void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
481 unsigned &LoadIndex) {
482 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
483
484 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
485 ? EndBlock
486 : LoadCmpBlocks[BlockIndex + 1];
487
488
491 Builder.Insert(CmpBr);
492 if (DTU)
493 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
494 {DominatorTree::Insert, BB, NextBB}});
495
496
497
498
499 if (BlockIndex == LoadCmpBlocks.size() - 1) {
501 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
502 }
503}
504
505
506
507
508
509
510
511
512
513
514void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
515
516 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
517
518 if (CurLoadEntry.LoadSize == 1) {
519 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
520 return;
521 }
522
523 Type *LoadSizeType =
525 Type *BSwapSizeType =
526 DL.isLittleEndian()
529 : nullptr;
532 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);
533 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
534
536
537 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
538 CurLoadEntry.Offset);
539
540
541
542 if (!IsUsedForZeroCmp) {
543 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
544 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
545 }
546
547 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
548 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
549 ? EndBlock
550 : LoadCmpBlocks[BlockIndex + 1];
551
552
555 Builder.Insert(CmpBr);
556 if (DTU)
557 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
558 {DominatorTree::Insert, BB, ResBlock.BB}});
559
560
561
562
563 if (BlockIndex == LoadCmpBlocks.size() - 1) {
565 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
566 }
567}
568
569
570
571
572void MemCmpExpansion::emitMemCmpResultBlock() {
573
574
575 if (IsUsedForZeroCmp) {
581 Builder.Insert(NewBr);
582 if (DTU)
583 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
584 return;
585 }
588
589 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
590 ResBlock.PhiSrc2);
591
594 ConstantInt::get(Builder.getInt32Ty(), 1));
595
598 Builder.Insert(NewBr);
599 if (DTU)
600 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
601}
602
603void MemCmpExpansion::setupResultBlockPHINodes() {
606
607 ResBlock.PhiSrc1 =
608 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
609 ResBlock.PhiSrc2 =
610 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
611}
612
613void MemCmpExpansion::setupEndBlockPHINodes() {
616}
617
618Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
619 unsigned LoadIndex = 0;
620
621
622 for (unsigned I = 0; I < getNumBlocks(); ++I) {
623 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
624 }
625
626 emitMemCmpResultBlock();
627 return PhiRes;
628}
629
630
631
632
633Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
634 unsigned LoadIndex = 0;
635 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
636 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
638}
639
640
641
642
643
644
645Value *MemCmpExpansion::getMemCmpOneBlock() {
646 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
648 Type *BSwapSizeType =
650 : nullptr;
651 Type *MaxLoadType =
654
655
656
657 if (Size == 1 || Size == 2) {
658 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,
660 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
661 }
662
663 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
664 0);
665
666
667
668
670 auto *UI = cast(*CI->user_begin());
671 CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
672 bool NeedsZExt = false;
673
674
675
676
677
681 Pred = ICmpInst::ICMP_SLT;
682 NeedsZExt = true;
685
686 Pred = ICmpInst::ICMP_SGE;
689
690 Pred = ICmpInst::ICMP_SLE;
691 } else {
692
694 }
695
696 if (ICmpInst::isSigned(Pred)) {
698 Loads.Lhs, Loads.Rhs);
700 UI->replaceAllUsesWith(Result);
701 UI->eraseFromParent();
703 return nullptr;
704 }
705 }
706
707
709 {Loads.Lhs, Loads.Rhs});
710}
711
712
713
714Value *MemCmpExpansion::getMemCmpExpansion() {
715
716 if (getNumBlocks() != 1) {
718 EndBlock = SplitBlock(StartBlock, CI, DTU, nullptr,
719 nullptr, "endblock");
720 setupEndBlockPHINodes();
721 createResultBlock();
722
723
724
725
726
727 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
728
729
730 createLoadCmpBlocks();
731
732
733
735 if (DTU)
736 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
737 {DominatorTree::Delete, StartBlock, EndBlock}});
738 }
739
741
742 if (IsUsedForZeroCmp)
743 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
744 : getMemCmpExpansionZeroCase();
745
746 if (getNumBlocks() == 1)
747 return getMemCmpOneBlock();
748
749 for (unsigned I = 0; I < getNumBlocks(); ++I) {
750 emitLoadCompareBlock(I);
751 }
752
753 emitMemCmpResultBlock();
754 return PhiRes;
755}
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
834 NumMemCmpCalls++;
835
836
838 return false;
839
840
842 if (!SizeCast) {
843 NumMemCmpNotConstant++;
844 return false;
845 }
847
848 if (SizeVal == 0) {
849 return false;
850 }
851
852
853 const bool IsUsedForZeroCmp =
857 IsUsedForZeroCmp);
858 if () return false;
859
862
863 if (OptForSize &&
866
869
870 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
871
872
873 if (Expansion.getNumLoads() == 0) {
874 NumMemCmpGreaterThanMax++;
875 return false;
876 }
877
878 NumMemCmpInlined++;
879
881
884 }
885
886 return true;
887}
888
889
894
900
901class ExpandMemCmpLegacyPass : public FunctionPass {
902public:
903 static char ID;
904
907 }
908
911
912 auto *TPC = getAnalysisIfAvailable();
913 if (!TPC) {
914 return false;
915 }
917 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
918
920 &getAnalysis().getTLI(F);
922 &getAnalysis().getTTI(F);
923 auto *PSI = &getAnalysis().getPSI();
925 &getAnalysis().getBFI() :
926 nullptr;
928 if (auto *DTWP = getAnalysisIfAvailable())
929 DT = &DTWP->getDomTree();
930 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
931 return !PA.areAllPreserved();
932 }
933
934private:
942 }
943};
944
950 CallInst *CI = dyn_cast(&I);
951 if (!CI) {
952 continue;
953 }
956 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
957 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
958 return true;
959 }
960 }
961 return false;
962}
963
968 std::optional DTU;
969 if (DT)
970 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
971
973 bool MadeChanges = false;
974 for (auto BBIt = F.begin(); BBIt != F.end();) {
975 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
976 MadeChanges = true;
977
978
979 BBIt = F.begin();
980 } else {
981 ++BBIt;
982 }
983 }
984 if (MadeChanges)
987 if (!MadeChanges)
991 return PA;
992}
993
994}
995
1002 .getCachedResult(*F.getParent());
1005 : nullptr;
1007
1008 return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);
1009}
1010
1011char ExpandMemCmpLegacyPass::ID = 0;
1013 "Expand memcmp() to load/stores", false, false)
1021
1023 return new ExpandMemCmpLegacyPass();
1024}
AMDGPU Mark last scratch load
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool runImpl(Function &F, const TargetLowering &TLI)
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
Merge contiguous icmps into a memcmp
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const T & front() const
front - Get the first element.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUser() const
Return true if there is exactly one user of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVMContext & getContext() const
All values hold a context through their type.
const ParentTy * getParent() const
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
FunctionPass * createExpandMemCmpLegacyPass()
@ Or
Bitwise or logical OR of integers.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
void initializeExpandMemCmpLegacyPassPass(PassRegistry &)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Returns options for expansion of memcmp. IsZeroCmp is.