LLVM: lib/Analysis/InlineCost.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
33#include "llvm/Config/llvm-config.h"
49#include
50#include
51#include
52
53using namespace llvm;
54
55#define DEBUG_TYPE "inline-cost"
56
57STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
58
61 cl::desc("Default amount of inlining to perform"));
62
63
64
65
66
67
70 cl::desc("Ignore TTI attributes compatibility check between callee/caller "
71 "during inline cost calculation"));
72
75 cl::desc("Prints comments for instruction based on inline cost analysis"));
76
79 cl::desc("Control the amount of inlining to perform (default = 225)"));
80
83 cl::desc("Threshold for inlining functions with inline hint"));
84
88 cl::desc("Threshold for inlining cold callsites"));
89
92 cl::desc("Enable the cost-benefit analysis for the inliner"));
93
94
95
96
99 cl::desc("Multiplier to multiply cycle savings by during inlining"));
100
101
102
103
106 cl::desc("A multiplier on top of cycle savings to decide whether the "
107 "savings won't justify the cost"));
108
111 cl::desc("The maximum size of a callee that get's "
112 "inlined without sufficient cycle savings"));
113
114
115
116
119 cl::desc("Threshold for inlining functions with cold attribute"));
120
123 cl::desc("Threshold for hot callsites "));
124
127 cl::desc("Threshold for locally hot callsites "));
128
131 cl::desc("Maximum block frequency, expressed as a percentage of caller's "
132 "entry frequency, for a callsite to be cold in the absence of "
133 "profile information."));
134
137 cl::desc("Minimum block frequency, expressed as a multiple of caller's "
138 "entry frequency, for a callsite to be hot in the absence of "
139 "profile information."));
140
143 cl::desc("Cost of a single instruction when inlining"));
144
147 cl::desc("Cost of a single inline asm instruction when inlining"));
148
151 cl::desc("Cost of load/store instruction when inlining"));
152
155 cl::desc("Call penalty that is applied per callsite when inlining"));
156
159 cl::init(std::numeric_limits<size_t>::max()),
160 cl::desc("Do not inline functions with a stack size "
161 "that exceeds the specified limit"));
162
164 "recursive-inline-max-stacksize", cl::Hidden,
166 cl::desc("Do not inline recursive functions with a stack "
167 "size that exceeds the specified limit"));
168
171 cl::desc("Compute the full inline cost of a call site even when the cost "
172 "exceeds the threshold."));
173
176 cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
177 "attributes."));
178
181 cl::desc("Disables evaluation of GetElementPtr with constant operands"));
182
185 cl::desc("Inline all viable calls, even if they exceed the inlining "
186 "threshold"));
187namespace llvm {
190 int AttrValue = 0;
192 return AttrValue;
193 }
194 return std::nullopt;
195}
196
200
204
207
208}
209
210}
211
212namespace {
213class InlineCostCallAnalyzer;
214
215
216
217struct InstructionCostDetail {
218 int CostBefore = 0;
219 int CostAfter = 0;
220 int ThresholdBefore = 0;
221 int ThresholdAfter = 0;
222
223 int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
224
225 int getCostDelta() const { return CostAfter - CostBefore; }
226
227 bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
228};
229
231private:
232 InlineCostCallAnalyzer *const ICCA;
233
234public:
235 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
236 void emitInstructionAnnot(const Instruction *I,
237 formatted_raw_ostream &OS) override;
238};
239
240
241
242
243
244
245
246
247
248class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
249 typedef InstVisitor<CallAnalyzer, bool> Base;
250 friend class InstVisitor<CallAnalyzer, bool>;
251
252protected:
253 virtual ~CallAnalyzer() = default;
254
255 const TargetTransformInfo &TTI;
256
257
258 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
259
260
261 function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
262
263
264 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
265
266
267 ProfileSummaryInfo *PSI;
268
269
271
272
273 const DataLayout &DL;
274
275
276 OptimizationRemarkEmitter *ORE;
277
278
279
280
281 CallBase &CandidateCall;
282
283
284 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache = nullptr;
285
286
287
288 virtual void onBlockStart(const BasicBlock *BB) {}
289
290
291 virtual void onBlockAnalyzed(const BasicBlock *BB) {}
292
293
294 virtual void onInstructionAnalysisStart(const Instruction *I) {}
295
296
297 virtual void onInstructionAnalysisFinish(const Instruction *I) {}
298
299
300
301
303
304
305
306
307 virtual bool shouldStop() { return false; }
308
309
310
311
312
314
315
316 virtual void onDisableSROA(AllocaInst *Arg) {}
317
318
319 virtual void onDisableLoadElimination() {}
320
321
322
323 virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
324
325
326 virtual void onCallPenalty() {}
327
328
329 virtual void onMemAccess(){};
330
331
332
333 virtual void onLoadEliminationOpportunity() {}
334
335
336
337 virtual void onCallArgumentSetup(const CallBase &Call) {}
338
339
340 virtual void onLoadRelativeIntrinsic() {}
341
342
343 virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
344 }
345
346
347
348 virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
349
350
351
352 virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
353
354
355
356 virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
357 bool DefaultDestUnreachable) {}
358
359
360
361 virtual void onMissedSimplification() {}
362
363
364 virtual void onInlineAsm(const InlineAsm &Arg) {}
365
366
367 virtual void onInitializeSROAArg(AllocaInst *Arg) {}
368
369
370 virtual void onAggregateSROAUse(AllocaInst *V) {}
371
372 bool handleSROA(Value *V, bool DoNotDisable) {
373
374 if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
375 if (DoNotDisable) {
376 onAggregateSROAUse(SROAArg);
377 return true;
378 }
379 disableSROAForArg(SROAArg);
380 }
381 return false;
382 }
383
384 bool IsCallerRecursive = false;
385 bool IsRecursiveCall = false;
386 bool ExposesReturnsTwice = false;
387 bool HasDynamicAlloca = false;
388 bool ContainsNoDuplicateCall = false;
389 bool HasReturn = false;
390 bool HasIndirectBr = false;
391 bool HasUninlineableIntrinsic = false;
392 bool InitsVargArgs = false;
393
394
395 uint64_t AllocatedSize = 0;
396 unsigned NumInstructions = 0;
397 unsigned NumInlineAsmInstructions = 0;
398 unsigned NumVectorInstructions = 0;
399
400
401
402
403
404
405
406
407
408 DenseMap<Value *, Value *> SimplifiedValues;
409
410
411
412 DenseMap<Value *, AllocaInst *> SROAArgValues;
413
414
415 DenseSet<AllocaInst *> EnabledSROAAllocas;
416
417
418 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
419
420
421 SmallPtrSet<BasicBlock *, 16> DeadBlocks;
422
423
424
425 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
426
427
428
429
430 bool EnableLoadElimination = true;
431
432
433 bool AllowRecursiveCall = false;
434
435 SmallPtrSet<Value *, 16> LoadAddrSet;
436
437 AllocaInst *getSROAArgForValueOrNull(Value *V) const {
438 auto It = SROAArgValues.find(V);
439 if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
440 return nullptr;
441 return It->second;
442 }
443
444
445
446 template T *getDirectOrSimplifiedValue(Value *V) const {
449 return getSimplifiedValue(V);
450 }
451
452
453 bool isAllocaDerivedArg(Value *V);
454 void disableSROAForArg(AllocaInst *SROAArg);
455 void disableSROA(Value *V);
456 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
457 void disableLoadElimination();
458 bool isGEPFree(GetElementPtrInst &GEP);
459 bool canFoldInboundsGEP(GetElementPtrInst &I);
460 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
461 bool simplifyCallSite(Function *F, CallBase &Call);
462 bool simplifyCmpInstForRecCall(CmpInst &Cmp);
464 bool simplifyIntrinsicCallIsConstant(CallBase &CB);
465 bool simplifyIntrinsicCallObjectSize(CallBase &CB);
466 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
467 bool isLoweredToCall(Function *F, CallBase &Call);
468
469
470
471
472
473
474 bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
475
476
477
478 bool isKnownNonNullInCallee(Value *V);
479
480
481 bool allowSizeGrowth(CallBase &Call);
482
483
484 InlineResult analyzeBlock(BasicBlock *BB,
485 const SmallPtrSetImpl<const Value *> &EphValues);
486
487
488
491 void visit(Function *);
492 void visit(Function &);
493 void visit(BasicBlock *);
494 void visit(BasicBlock &);
495
496
497 bool visitInstruction(Instruction &I);
498
499
500 bool visitAlloca(AllocaInst &I);
501 bool visitPHI(PHINode &I);
502 bool visitGetElementPtr(GetElementPtrInst &I);
503 bool visitBitCast(BitCastInst &I);
504 bool visitPtrToInt(PtrToIntInst &I);
505 bool visitIntToPtr(IntToPtrInst &I);
506 bool visitCastInst(CastInst &I);
507 bool visitCmpInst(CmpInst &I);
508 bool visitSub(BinaryOperator &I);
509 bool visitBinaryOperator(BinaryOperator &I);
510 bool visitFNeg(UnaryOperator &I);
511 bool visitLoad(LoadInst &I);
512 bool visitStore(StoreInst &I);
513 bool visitExtractValue(ExtractValueInst &I);
514 bool visitInsertValue(InsertValueInst &I);
515 bool visitCallBase(CallBase &Call);
516 bool visitReturnInst(ReturnInst &RI);
517 bool visitBranchInst(BranchInst &BI);
518 bool visitSelectInst(SelectInst &SI);
519 bool visitSwitchInst(SwitchInst &SI);
520 bool visitIndirectBrInst(IndirectBrInst &IBI);
521 bool visitResumeInst(ResumeInst &RI);
522 bool visitCleanupReturnInst(CleanupReturnInst &RI);
523 bool visitCatchReturnInst(CatchReturnInst &RI);
524 bool visitUnreachableInst(UnreachableInst &I);
525
526public:
527 CallAnalyzer(
528 Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
529 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
530 function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
531 function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
532 ProfileSummaryInfo *PSI = nullptr,
533 OptimizationRemarkEmitter *ORE = nullptr,
534 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
535 nullptr)
536 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
537 GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
538 CandidateCall(Call), GetEphValuesCache(GetEphValuesCache) {}
539
540 InlineResult analyze();
541
542
543 Value *getSimplifiedValueUnchecked(Value *V) const {
544 return SimplifiedValues.lookup(V);
545 }
546
547
548
549 template T *getSimplifiedValue(Value *V) const {
550 Value *SimpleV = SimplifiedValues.lookup(V);
551 if (!SimpleV)
552 return nullptr;
553
554
555
556 if constexpr (std::is_base_of_v<Constant, T>)
558
559
561 if (I->getFunction() != &F)
562 return nullptr;
564 if (Arg->getParent() != &F)
565 return nullptr;
567 return nullptr;
569 }
570
571
572
573 unsigned NumConstantArgs = 0;
574 unsigned NumConstantOffsetPtrArgs = 0;
575 unsigned NumAllocaArgs = 0;
576 unsigned NumConstantPtrCmps = 0;
577 unsigned NumConstantPtrDiffs = 0;
578 unsigned NumInstructionsSimplified = 0;
579
581};
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
599 return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
600}
601
602
603
604class InlineCostCallAnalyzer final : public CallAnalyzer {
605 const bool ComputeFullInlineCost;
606 int LoadEliminationCost = 0;
607
608
609 int VectorBonus = 0;
610
611 int SingleBBBonus = 0;
612
613
614 const InlineParams &Params;
615
616
617
618
619 DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
620
621
622
623 int Threshold = 0;
624
625
626 int StaticBonusApplied = 0;
627
628
629 const bool BoostIndirectCalls;
630
631
632 const bool IgnoreThreshold;
633
634
635 const bool CostBenefitAnalysisEnabled;
636
637
638
639
640
641 int Cost = 0;
642
643
644
645
646 int CostAtBBStart = 0;
647
648
649
650 int ColdSize = 0;
651
652
653 bool DecidedByCostThreshold = false;
654
655
656 bool DecidedByCostBenefit = false;
657
658
659 std::optional CostBenefit;
660
661 bool SingleBB = true;
662
663 unsigned SROACostSavings = 0;
664 unsigned SROACostSavingsLost = 0;
665
666
667
668
669 DenseMap<AllocaInst *, int> SROAArgCosts;
670
671
673
674
675
676
677
678 void updateThreshold(CallBase &Call, Function &Callee);
679
680 std::optional getHotCallSiteThreshold(CallBase &Call,
681 BlockFrequencyInfo *CallerBFI);
682
683
684 void addCost(int64_t Inc) {
685 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
686 Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX);
687 }
688
689 void onDisableSROA(AllocaInst *Arg) override {
690 auto CostIt = SROAArgCosts.find(Arg);
691 if (CostIt == SROAArgCosts.end())
692 return;
693 addCost(CostIt->second);
694 SROACostSavings -= CostIt->second;
695 SROACostSavingsLost += CostIt->second;
696 SROAArgCosts.erase(CostIt);
697 }
698
699 void onDisableLoadElimination() override {
700 addCost(LoadEliminationCost);
701 LoadEliminationCost = 0;
702 }
703
704 bool onCallBaseVisitStart(CallBase &Call) override {
705 if (std::optional AttrCallThresholdBonus =
707 Threshold += *AttrCallThresholdBonus;
708
709 if (std::optional AttrCallCost =
711 addCost(*AttrCallCost);
712
713
714 return false;
715 }
716 return true;
717 }
718
719 void onCallPenalty() override { addCost(CallPenalty); }
720
721 void onMemAccess() override { addCost(MemAccessCost); }
722
723 void onCallArgumentSetup(const CallBase &Call) override {
724
725
727 }
728 void onLoadRelativeIntrinsic() override {
729
731 }
732 void onLoweredCall(Function *F, CallBase &Call,
733 bool IsIndirectCall) override {
734
736
737
738
739
740
741
742 if (IsIndirectCall && BoostIndirectCalls) {
743 auto IndirectCallParams = Params;
744 IndirectCallParams.DefaultThreshold =
746
747
748 InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
749 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
750 false);
751 if (CA.analyze().isSuccess()) {
752
753
754 addCost(-std::max(0, CA.getThreshold() - CA.getCost()));
755 }
756 } else
757
760 }
761
762 void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
763 bool DefaultDestUnreachable) override {
764
765
766
767 if (JumpTableSize) {
768
769
770 if (!DefaultDestUnreachable)
772
773 int64_t JTCost =
775 addCost(JTCost);
776 return;
777 }
778
779 if (NumCaseCluster <= 3) {
780
781
782
783 addCost((NumCaseCluster - DefaultDestUnreachable) * 2 * InstrCost);
784 return;
785 }
786
787 int64_t ExpectedNumberOfCompare =
788 getExpectedNumberOfCompare(NumCaseCluster);
789 int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
790
791 addCost(SwitchCost);
792 }
793
794
795
796
797 void onInlineAsm(const InlineAsm &Arg) override {
799 return;
802 int SectionLevel = 0;
803 int InlineAsmInstrCount = 0;
804 for (StringRef AsmStr : AsmStrs) {
805
806 StringRef Trimmed = AsmStr.trim();
807 size_t hashPos = Trimmed.find('#');
809 Trimmed = Trimmed.substr(0, hashPos);
810
811 if (Trimmed.empty())
812 continue;
813
814
815
816
817 if (Trimmed.starts_with(".pushsection")) {
818 ++SectionLevel;
819 continue;
820 }
821 if (Trimmed.starts_with(".popsection")) {
822 --SectionLevel;
823 continue;
824 }
825
827 continue;
828 if (SectionLevel == 0)
829 ++InlineAsmInstrCount;
830 }
831 NumInlineAsmInstructions += InlineAsmInstrCount;
833 }
834
835 void onMissedSimplification() override { addCost(InstrCost); }
836
837 void onInitializeSROAArg(AllocaInst *Arg) override {
838 assert(Arg != nullptr &&
839 "Should not initialize SROA costs for null value.");
841 SROACostSavings += SROAArgCost;
842 SROAArgCosts[Arg] = SROAArgCost;
843 }
844
845 void onAggregateSROAUse(AllocaInst *SROAArg) override {
846 auto CostIt = SROAArgCosts.find(SROAArg);
847 assert(CostIt != SROAArgCosts.end() &&
848 "expected this argument to have a cost");
851 }
852
853 void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
854
855 void onBlockAnalyzed(const BasicBlock *BB) override {
856 if (CostBenefitAnalysisEnabled) {
857
858
859 assert(GetBFI && "GetBFI must be available");
860 BlockFrequencyInfo *BFI = &(GetBFI(F));
861 assert(BFI && "BFI must be available");
864 ColdSize += Cost - CostAtBBStart;
865 }
866
868
869
870
871
872 if (SingleBB && TI->getNumSuccessors() > 1) {
873
874 Threshold -= SingleBBBonus;
875 SingleBB = false;
876 }
877 }
878
879 void onInstructionAnalysisStart(const Instruction *I) override {
880
881
883 return;
884 auto &CostDetail = InstructionCostDetailMap[I];
885 CostDetail.CostBefore = Cost;
886 CostDetail.ThresholdBefore = Threshold;
887 }
888
889 void onInstructionAnalysisFinish(const Instruction *I) override {
890
891
893 return;
894 auto &CostDetail = InstructionCostDetailMap[I];
895 CostDetail.CostAfter = Cost;
896 CostDetail.ThresholdAfter = Threshold;
897 }
898
899 bool isCostBenefitAnalysisEnabled() {
900 if (!PSI || !PSI->hasProfileSummary())
901 return false;
902
903 if (!GetBFI)
904 return false;
905
907
909 return false;
910 } else {
911
912 if (!PSI->hasInstrumentationProfile())
913 return false;
914 }
915
917 if (->getEntryCount())
918 return false;
919
920 BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
921 if (!CallerBFI)
922 return false;
923
924
925 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
926 return false;
927
928
929 auto EntryCount = F.getEntryCount();
930 if (!EntryCount || !EntryCount->getCount())
931 return false;
932
933 BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
934 if (!CalleeBFI)
935 return false;
936
937 return true;
938 }
939
940
941 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
945 }
946
947
948 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
952 }
953
954 void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {
956 CandidateCall, "inline-cycle-savings-for-test")) {
957 CycleSavings = *AttrCycleSavings;
958 }
959
961 CandidateCall, "inline-runtime-cost-for-test")) {
962 Size = *AttrRuntimeCost;
963 }
964 }
965
966
967
968
969 std::optional costBenefitAnalysis() {
970 if (!CostBenefitAnalysisEnabled)
971 return std::nullopt;
972
973
974
975
976
977 if (Threshold == 0)
978 return std::nullopt;
979
981 BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
983
984
985
986
987
988
989
990
991
992
993
994 APInt CycleSavings(128, 0);
995
996 for (auto &BB : F) {
997 APInt CurrentSavings(128, 0);
998 for (auto &I : BB) {
1000
1001 if (BI->isConditional() &&
1002 getSimplifiedValue(BI->getCondition())) {
1004 }
1006 if (getSimplifiedValue(SI->getCondition()))
1009
1010 if (SimplifiedValues.count(V)) {
1012 }
1013 }
1014 }
1015
1018 CycleSavings += CurrentSavings;
1019 }
1020
1021
1022 auto EntryProfileCount = F.getEntryCount();
1023 assert(EntryProfileCount && EntryProfileCount->getCount());
1024 auto EntryCount = EntryProfileCount->getCount();
1025 CycleSavings += EntryCount / 2;
1026 CycleSavings = CycleSavings.udiv(EntryCount);
1027
1028
1029 auto *CallerBB = CandidateCall.getParent();
1030 BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
1033
1034
1035
1036
1037 int Size = Cost - ColdSize;
1038
1039
1040
1042
1043 OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);
1044 CostBenefit.emplace(APInt(128, Size), CycleSavings);
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
1068 Threshold *= Size;
1069
1070 APInt UpperBoundCycleSavings = CycleSavings;
1071 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1072 if (UpperBoundCycleSavings.uge(Threshold))
1073 return true;
1074
1075 APInt LowerBoundCycleSavings = CycleSavings;
1076 LowerBoundCycleSavings *=
1077 getInliningCostBenefitAnalysisProfitableMultiplier();
1078 if (LowerBoundCycleSavings.ult(Threshold))
1079 return false;
1080
1081
1082 return std::nullopt;
1083 }
1084
1085 InlineResult finalizeAnalysis() override {
1086
1087
1088
1089
1090
1092 if (Caller->hasMinSize()) {
1093 DominatorTree DT(F);
1094 LoopInfo LI(DT);
1095 int NumLoops = 0;
1096 for (Loop *L : LI) {
1097
1098 if (DeadBlocks.count(L->getHeader()))
1099 continue;
1100 NumLoops++;
1101 }
1103 }
1104
1105
1106
1107
1108 if (NumVectorInstructions <= NumInstructions / 10)
1109 Threshold -= VectorBonus;
1110 else if (NumVectorInstructions <= NumInstructions / 2)
1111 Threshold -= VectorBonus / 2;
1112
1113 if (std::optional AttrCost =
1115 Cost = *AttrCost;
1116
1118 CandidateCall,
1120 Cost *= *AttrCostMult;
1121
1122 if (std::optional AttrThreshold =
1124 Threshold = *AttrThreshold;
1125
1126 if (auto Result = costBenefitAnalysis()) {
1127 DecidedByCostBenefit = true;
1128 if (*Result)
1130 else
1132 }
1133
1134 if (IgnoreThreshold)
1136
1137 DecidedByCostThreshold = true;
1138 return Cost < std::max(1, Threshold)
1140 : InlineResult::failure("Cost over threshold.");
1141 }
1142
1143 bool shouldStop() override {
1144 if (IgnoreThreshold || ComputeFullInlineCost)
1145 return false;
1146
1147
1148 if (Cost < Threshold)
1149 return false;
1150 DecidedByCostThreshold = true;
1151 return true;
1152 }
1153
1154 void onLoadEliminationOpportunity() override {
1155 LoadEliminationCost += InstrCost;
1156 }
1157
1158 InlineResult onAnalysisStart() override {
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169 assert(NumInstructions == 0);
1170 assert(NumVectorInstructions == 0);
1171
1172
1173 updateThreshold(CandidateCall, F);
1174
1175
1176
1177
1178 assert(Threshold >= 0);
1179 assert(SingleBBBonus >= 0);
1180 assert(VectorBonus >= 0);
1181
1182
1183
1184
1185 Threshold += (SingleBBBonus + VectorBonus);
1186
1187
1188
1190
1191
1192
1193 if (F.getCallingConv() == CallingConv::Cold)
1195
1196 LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
1197
1198
1199 if (Cost >= Threshold && !ComputeFullInlineCost)
1201
1203 }
1204
1205public:
1206 InlineCostCallAnalyzer(
1207 Function &Callee, CallBase &Call, const InlineParams &Params,
1208 const TargetTransformInfo &TTI,
1209 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1210 function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1211 function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
1212 ProfileSummaryInfo *PSI = nullptr,
1213 OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1214 bool IgnoreThreshold = false,
1215 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1216 nullptr)
1217 : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1218 ORE, GetEphValuesCache),
1220 Params.ComputeFullInlineCost || ORE ||
1221 isCostBenefitAnalysisEnabled()),
1223 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1224 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1225 Writer(this) {
1226 AllowRecursiveCall = *Params.AllowRecursiveCall;
1227 }
1228
1229
1230 InlineCostAnnotationWriter Writer;
1231
1232 void dump();
1233
1234
1235
1236 void print(raw_ostream &OS);
1237
1238 std::optional getCostDetails(const Instruction *I) {
1239 auto It = InstructionCostDetailMap.find(I);
1240 if (It != InstructionCostDetailMap.end())
1241 return It->second;
1242 return std::nullopt;
1243 }
1244
1245 ~InlineCostCallAnalyzer() override = default;
1246 int getThreshold() const { return Threshold; }
1247 int getCost() const { return Cost; }
1248 int getStaticBonusApplied() const { return StaticBonusApplied; }
1249 std::optional getCostBenefitPair() { return CostBenefit; }
1250 bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1251 bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1252};
1253
1254
1255static bool isSoleCallToLocalFunction(const CallBase &CB,
1257 return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&
1259}
1260
1261class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1262private:
1264
1265
1266
1267
1268 static constexpr int JTCostMultiplier = 2;
1269 static constexpr int CaseClusterCostMultiplier = 2;
1270 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1271 static constexpr int SwitchCostMultiplier = 2;
1272
1273
1274
1275 unsigned SROACostSavingOpportunities = 0;
1276 int VectorBonus = 0;
1277 int SingleBBBonus = 0;
1278 int Threshold = 5;
1279
1280 DenseMap<AllocaInst *, unsigned> SROACosts;
1281
1283 Cost[static_cast<size_t>(Feature)] += Delta;
1284 }
1285
1287 Cost[static_cast<size_t>(Feature)] = Value;
1288 }
1289
1290 void onDisableSROA(AllocaInst *Arg) override {
1291 auto CostIt = SROACosts.find(Arg);
1292 if (CostIt == SROACosts.end())
1293 return;
1294
1295 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1296 SROACostSavingOpportunities -= CostIt->second;
1297 SROACosts.erase(CostIt);
1298 }
1299
1300 void onDisableLoadElimination() override {
1301 set(InlineCostFeatureIndex::load_elimination, 1);
1302 }
1303
1304 void onCallPenalty() override {
1305 increment(InlineCostFeatureIndex::call_penalty, CallPenalty);
1306 }
1307
1308 void onCallArgumentSetup(const CallBase &Call) override {
1309 increment(InlineCostFeatureIndex::call_argument_setup,
1311 }
1312
1313 void onLoadRelativeIntrinsic() override {
1314 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 * InstrCost);
1315 }
1316
1317 void onLoweredCall(Function *F, CallBase &Call,
1318 bool IsIndirectCall) override {
1319 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1321
1322 if (IsIndirectCall) {
1323 InlineParams IndirectCallParams = { 0,
1324 {},
1325 {},
1326 {},
1327 {},
1328 {},
1329 {},
1330 {},
1331 true,
1332 true};
1335
1336 InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1337 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1338 false, true);
1339 if (CA.analyze().isSuccess()) {
1340 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1341 CA.getCost());
1342 increment(InlineCostFeatureIndex::nested_inlines, 1);
1343 }
1344 } else {
1345 onCallPenalty();
1346 }
1347 }
1348
1349 void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
1350 bool DefaultDestUnreachable) override {
1351 if (JumpTableSize) {
1352 if (!DefaultDestUnreachable)
1353 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1354 SwitchDefaultDestCostMultiplier * InstrCost);
1355 int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
1357 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1358 return;
1359 }
1360
1361 if (NumCaseCluster <= 3) {
1362 increment(InlineCostFeatureIndex::case_cluster_penalty,
1363 (NumCaseCluster - DefaultDestUnreachable) *
1364 CaseClusterCostMultiplier * InstrCost);
1365 return;
1366 }
1367
1368 int64_t ExpectedNumberOfCompare =
1369 getExpectedNumberOfCompare(NumCaseCluster);
1370
1371 int64_t SwitchCost =
1372 ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
1373 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1374 }
1375
1376 void onMissedSimplification() override {
1377 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1379 }
1380
1381 void onInitializeSROAArg(AllocaInst *Arg) override {
1383 SROACosts[Arg] = SROAArgCost;
1384 SROACostSavingOpportunities += SROAArgCost;
1385 }
1386
1387 void onAggregateSROAUse(AllocaInst *Arg) override {
1388 SROACosts.find(Arg)->second += InstrCost;
1389 SROACostSavingOpportunities += InstrCost;
1390 }
1391
1392 void onBlockAnalyzed(const BasicBlock *BB) override {
1394 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1395 Threshold -= SingleBBBonus;
1396 }
1397
1398 InlineResult finalizeAnalysis() override {
1400 if (Caller->hasMinSize()) {
1401 DominatorTree DT(F);
1402 LoopInfo LI(DT);
1403 for (Loop *L : LI) {
1404
1405 if (DeadBlocks.count(L->getHeader()))
1406 continue;
1407 increment(InlineCostFeatureIndex::num_loops,
1409 }
1410 }
1411 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.size());
1412 set(InlineCostFeatureIndex::simplified_instructions,
1413 NumInstructionsSimplified);
1414 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1415 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1416 NumConstantOffsetPtrArgs);
1417 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1418
1419 if (NumVectorInstructions <= NumInstructions / 10)
1420 Threshold -= VectorBonus;
1421 else if (NumVectorInstructions <= NumInstructions / 2)
1422 Threshold -= VectorBonus / 2;
1423
1424 set(InlineCostFeatureIndex::threshold, Threshold);
1425
1427 }
1428
1429 bool shouldStop() override { return false; }
1430
1431 void onLoadEliminationOpportunity() override {
1432 increment(InlineCostFeatureIndex::load_elimination, 1);
1433 }
1434
1435 InlineResult onAnalysisStart() override {
1436 increment(InlineCostFeatureIndex::callsite_cost,
1438
1439 set(InlineCostFeatureIndex::cold_cc_penalty,
1440 (F.getCallingConv() == CallingConv::Cold));
1441
1442 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1443 isSoleCallToLocalFunction(CandidateCall, F));
1444
1445
1446
1447
1448 int SingleBBBonusPercent = 50;
1452 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1453 VectorBonus = Threshold * VectorBonusPercent / 100;
1454 Threshold += (SingleBBBonus + VectorBonus);
1455
1457 }
1458
1459public:
1460 InlineCostFeaturesAnalyzer(
1461 const TargetTransformInfo &TTI,
1462 function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1463 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1464 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
1465 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
1466 CallBase &Call)
1467 : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,
1468 PSI) {}
1469
1471};
1472
1473}
1474
1475
1476bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1477 return SROAArgValues.count(V);
1478}
1479
1480void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1481 onDisableSROA(SROAArg);
1482 EnabledSROAAllocas.erase(SROAArg);
1483 disableLoadElimination();
1484}
1485
1486void InlineCostAnnotationWriter::emitInstructionAnnot(
1487 const Instruction *I, formatted_raw_ostream &OS) {
1488
1489
1490
1491 std::optional Record = ICCA->getCostDetails(I);
1492 if (!Record)
1493 OS << "; No analysis for the instruction";
1494 else {
1495 OS << "; cost before = " << Record->CostBefore
1496 << ", cost after = " << Record->CostAfter
1497 << ", threshold before = " << Record->ThresholdBefore
1498 << ", threshold after = " << Record->ThresholdAfter << ", ";
1499 OS << "cost delta = " << Record->getCostDelta();
1500 if (Record->hasThresholdChanged())
1501 OS << ", threshold delta = " << Record->getThresholdDelta();
1502 }
1503 auto *V = ICCA->getSimplifiedValueUnchecked(const_cast<Instruction *>(I));
1504 if (V) {
1505 OS << ", simplified to ";
1506 V->print(OS, true);
1508 if (VI->getFunction() != I->getFunction())
1509 OS << " (caller instruction)";
1511 if (VArg->getParent() != I->getFunction())
1512 OS << " (caller argument)";
1513 }
1514 }
1515 OS << "\n";
1516}
1517
1518
1519void CallAnalyzer::disableSROA(Value *V) {
1520 if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1521 disableSROAForArg(SROAArg);
1522 }
1523}
1524
1525void CallAnalyzer::disableLoadElimination() {
1526 if (EnableLoadElimination) {
1527 onDisableLoadElimination();
1528 EnableLoadElimination = false;
1529 }
1530}
1531
1532
1533
1534
1535
1536bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1537 unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1538 assert(IntPtrWidth == Offset.getBitWidth());
1539
1541 GTI != GTE; ++GTI) {
1542 ConstantInt *OpC =
1543 getDirectOrSimplifiedValue(GTI.getOperand());
1544 if (!OpC)
1545 return false;
1547 continue;
1548
1549
1550 if (StructType *STy = GTI.getStructTypeOrNull()) {
1552 const StructLayout *SL = DL.getStructLayout(STy);
1554 continue;
1555 }
1556
1557 APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
1559 }
1560 return true;
1561}
1562
1563
1564
1565
1566bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1569 for (const Use &Op : GEP.indices())
1570 if (Constant *SimpleOp = getSimplifiedValue(Op))
1572 else
1577}
1578
1579bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1580 disableSROA(I.getOperand(0));
1581
1582
1583
1584 if (I.isArrayAllocation()) {
1585 Constant *Size = getSimplifiedValue(I.getArraySize());
1587
1588
1589
1590
1591
1592
1593
1594
1595 Type *Ty = I.getAllocatedType();
1597 AllocSize->getLimitedValue(),
1598 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1600 HasDynamicAlloca = true;
1601 return false;
1602 }
1603 }
1604
1605
1606 if (I.isStaticAlloca()) {
1607 Type *Ty = I.getAllocatedType();
1608 AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinValue(),
1609 AllocatedSize);
1610 }
1611
1612
1613
1614
1615
1616 if (.isStaticAlloca())
1617 HasDynamicAlloca = true;
1618
1619 return false;
1620}
1621
1622bool CallAnalyzer::visitPHI(PHINode &I) {
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633 APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1634 bool CheckSROA = I.getType()->isPointerTy();
1635
1636
1638 std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1639 Value *FirstV = nullptr;
1640
1641 for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1642 BasicBlock *Pred = I.getIncomingBlock(i);
1643
1644 if (DeadBlocks.count(Pred))
1645 continue;
1646
1647
1648 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1649 if (KnownSuccessor && KnownSuccessor != I.getParent())
1650 continue;
1651
1652 Value *V = I.getIncomingValue(i);
1653
1654 if (&I == V)
1655 continue;
1656
1657 Constant *C = getDirectOrSimplifiedValue(V);
1658
1659 std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1660 if ( && CheckSROA)
1661 BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1662
1663 if ( && !BaseAndOffset.first)
1664
1665
1666 return true;
1667
1668 if (FirstC) {
1669 if (FirstC == C)
1670
1671
1672 continue;
1673
1674
1675 return true;
1676 }
1677
1678 if (FirstV) {
1679
1680 if (FirstBaseAndOffset == BaseAndOffset)
1681 continue;
1682 return true;
1683 }
1684
1685 if (C) {
1686
1687 FirstC = C;
1688 continue;
1689 }
1690
1691
1692
1693 FirstV = V;
1694 FirstBaseAndOffset = BaseAndOffset;
1695 }
1696
1697
1698 if (FirstC) {
1699 SimplifiedValues[&I] = FirstC;
1700 return true;
1701 }
1702
1703
1704 if (FirstBaseAndOffset.first) {
1705 ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1706
1707 if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1708 SROAArgValues[&I] = SROAArg;
1709 }
1710
1711 return true;
1712}
1713
1714
1715
1716
1717
1718bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1719
1720 std::pair<Value *, APInt> BaseAndOffset =
1721 ConstantOffsetPtrs.lookup(I.getPointerOperand());
1722 if (!BaseAndOffset.first)
1723 return false;
1724
1725
1726
1727 if (!accumulateGEPOffset(cast(I), BaseAndOffset.second))
1728 return false;
1729
1730
1731 ConstantOffsetPtrs[&I] = BaseAndOffset;
1732
1733 return true;
1734}
1735
1736bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1737 auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1738
1739
1740 auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1741 for (const Use &Op : GEP.indices())
1742 if (!getDirectOrSimplifiedValue(Op))
1743 return false;
1744 return true;
1745 };
1746
1749 return true;
1750
1751 if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1752 if (SROAArg)
1753 SROAArgValues[&I] = SROAArg;
1754
1755
1756 return true;
1757 }
1758
1759
1760 if (SROAArg)
1761 disableSROAForArg(SROAArg);
1762 return isGEPFree(I);
1763}
1764
1765
1766
1767
1768bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
1769
1771 return false;
1772 auto *CmpOp = Cmp.getOperand(0);
1773
1774 if (CandidateCall.getCaller() != &F)
1775 return false;
1776
1777 auto *CallBB = CandidateCall.getParent();
1778 auto *Predecessor = CallBB->getSinglePredecessor();
1779 if (!Predecessor)
1780 return false;
1781
1783 if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
1784 return false;
1785
1786
1787
1788 bool ArgFound = false;
1789 Value *FuncArg = nullptr, *CallArg = nullptr;
1790 for (unsigned ArgNum = 0;
1791 ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {
1792 FuncArg = F.getArg(ArgNum);
1794 if (FuncArg == CmpOp && CallArg != CmpOp) {
1795 ArgFound = true;
1796 break;
1797 }
1798 }
1799 if (!ArgFound)
1800 return false;
1801
1802
1803
1805 CondContext CC(&Cmp);
1806 CC.Invert = (CallBB != Br->getSuccessor(0));
1807 SQ.CC = &CC;
1808 CC.AffectedValues.insert(FuncArg);
1812
1813
1814 if ((ConstVal->isOne() && CC.Invert) ||
1815 (ConstVal->isZero() && !CC.Invert)) {
1816 SimplifiedValues[&Cmp] = ConstVal;
1817 return true;
1818 }
1819 }
1820 return false;
1821}
1822
1823
1824bool CallAnalyzer::simplifyInstruction(Instruction &I) {
1826 for (Value *Op : I.operands()) {
1827 Constant *COp = getDirectOrSimplifiedValue(Op);
1828 if (!COp)
1829 return false;
1831 }
1833 if ()
1834 return false;
1835 SimplifiedValues[&I] = C;
1836 return true;
1837}
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1850 auto *C = getDirectOrSimplifiedValue(Arg);
1851
1853 SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
1854 return true;
1855}
1856
1857bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
1858
1859
1861 return false;
1862
1864 true);
1866 if (C)
1867 SimplifiedValues[&CB] = C;
1868 return C;
1869}
1870
1871bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1872
1874 return true;
1875
1876
1877 std::pair<Value *, APInt> BaseAndOffset =
1878 ConstantOffsetPtrs.lookup(I.getOperand(0));
1879
1880 if (BaseAndOffset.first)
1881 ConstantOffsetPtrs[&I] = BaseAndOffset;
1882
1883
1884 if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1885 SROAArgValues[&I] = SROAArg;
1886
1887
1888 return true;
1889}
1890
1891bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1892
1894 return true;
1895
1896
1897
1899 unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1900 if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1901 std::pair<Value *, APInt> BaseAndOffset =
1902 ConstantOffsetPtrs.lookup(I.getOperand(0));
1903 if (BaseAndOffset.first)
1904 ConstantOffsetPtrs[&I] = BaseAndOffset;
1905 }
1906
1907
1908
1909
1910
1911
1912
1913
1914 if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1915 SROAArgValues[&I] = SROAArg;
1916
1919}
1920
1921bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1922
1924 return true;
1925
1926
1927
1928 Value *Op = I.getOperand(0);
1929 unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1930 if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1931 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1932 if (BaseAndOffset.first)
1933 ConstantOffsetPtrs[&I] = BaseAndOffset;
1934 }
1935
1936
1937 if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1938 SROAArgValues[&I] = SROAArg;
1939
1942}
1943
1944bool CallAnalyzer::visitCastInst(CastInst &I) {
1945
1947 return true;
1948
1949
1950
1951 disableSROA(I.getOperand(0));
1952
1953
1954
1955
1956 switch (I.getOpcode()) {
1957 case Instruction::FPTrunc:
1958 case Instruction::FPExt:
1959 case Instruction::UIToFP:
1960 case Instruction::SIToFP:
1961 case Instruction::FPToUI:
1962 case Instruction::FPToSI:
1964 onCallPenalty();
1965 break;
1966 default:
1967 break;
1968 }
1969
1972}
1973
1974bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1975 return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1976}
1977
1978bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1979
1980
1981
1982
1983
1985 if (paramHasAttr(A, Attribute::NonNull))
1986 return true;
1987
1988
1989
1990
1991 if (isAllocaDerivedArg(V))
1992
1993
1994
1995 return true;
1996
1997 return false;
1998}
1999
2000bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2018 return false;
2020 return false;
2021
2022 return true;
2023}
2024
2025bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
2026 BlockFrequencyInfo *CallerBFI) {
2027
2028
2029 if (PSI && PSI->hasProfileSummary())
2030 return PSI->isColdCallSite(Call, CallerBFI);
2031
2032
2033 if (!CallerBFI)
2034 return false;
2035
2036
2037
2038
2039
2042 auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
2043 auto CallerEntryFreq =
2045 return CallSiteFreq < CallerEntryFreq * ColdProb;
2046}
2047
2048std::optional
2049InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
2050 BlockFrequencyInfo *CallerBFI) {
2051
2052
2053
2054 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
2056
2057
2058
2060 return std::nullopt;
2061
2062
2063
2064
2065
2067 BlockFrequency CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
2068 BlockFrequency CallerEntryFreq = CallerBFI->getEntryFreq();
2070 if (Limit && CallSiteFreq >= *Limit)
2072
2073
2074 return std::nullopt;
2075}
2076
2077void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
2078
2079 if (!allowSizeGrowth(Call)) {
2080 Threshold = 0;
2081 return;
2082 }
2083
2085
2086
2087 auto MinIfValid = [](int A, std::optional B) {
2088 return B ? std::min(A, *B) : A;
2089 };
2090
2091
2092 auto MaxIfValid = [](int A, std::optional B) {
2093 return B ? std::max(A, *B) : A;
2094 };
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108 int SingleBBBonusPercent = 50;
2111
2112
2113 auto DisallowAllBonuses = [&]() {
2114 SingleBBBonusPercent = 0;
2115 VectorBonusPercent = 0;
2116 LastCallToStaticBonus = 0;
2117 };
2118
2119
2120
2121 if (Caller->hasMinSize()) {
2123
2124
2125
2126
2127 SingleBBBonusPercent = 0;
2128 VectorBonusPercent = 0;
2129 } else if (Caller->hasOptSize())
2131
2132
2133
2134 if (->hasMinSize()) {
2135 if (Callee.hasFnAttribute(Attribute::InlineHint))
2136 Threshold = MaxIfValid(Threshold, Params.HintThreshold);
2137
2138
2139
2140
2141
2142
2143
2144
2145 BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
2149
2150
2151
2152
2156
2157
2158
2159
2160 DisallowAllBonuses();
2162 } else if (PSI) {
2163
2164
2165 if (PSI->isFunctionEntryHot(&Callee)) {
2167
2168
2169
2170 Threshold = MaxIfValid(Threshold, Params.HintThreshold);
2171 } else if (PSI->isFunctionEntryCold(&Callee)) {
2173
2174
2175
2176
2177 DisallowAllBonuses();
2178 Threshold = MinIfValid(Threshold, Params.ColdThreshold);
2179 }
2180 }
2181 }
2182
2184
2185
2186
2188
2189 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2190 VectorBonus = Threshold * VectorBonusPercent / 100;
2191
2192
2193
2194
2195 if (isSoleCallToLocalFunction(Call, F)) {
2196 addCost(-LastCallToStaticBonus);
2197 StaticBonusApplied = LastCallToStaticBonus;
2198 }
2199}
2200
2201bool CallAnalyzer::visitCmpInst(CmpInst &I) {
2202 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2203
2205 return true;
2206
2207
2208 if (simplifyCmpInstForRecCall(I))
2209 return true;
2210
2211 if (I.getOpcode() == Instruction::FCmp)
2212 return false;
2213
2214
2215
2216 Value *LHSBase, *RHSBase;
2217 APInt LHSOffset, RHSOffset;
2218 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
2219 if (LHSBase) {
2220 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
2221 if (RHSBase && LHSBase == RHSBase) {
2222
2223
2225 I.getType(),
2227 ++NumConstantPtrCmps;
2228 return true;
2229 }
2230 }
2231
2232 auto isImplicitNullCheckCmp = [](const CmpInst &I) {
2233 for (auto *User : I.users())
2235 if (->getMetadata(LLVMContext::MD_make_implicit))
2236 return false;
2237 return true;
2238 };
2239
2240
2241
2243 if (isKnownNonNullInCallee(I.getOperand(0))) {
2247 return true;
2248 }
2249
2250
2251 if (isImplicitNullCheckCmp(I))
2252 return true;
2253 }
2255}
2256
2257bool CallAnalyzer::visitSub(BinaryOperator &I) {
2258
2259
2260 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2261 Value *LHSBase, *RHSBase;
2262 APInt LHSOffset, RHSOffset;
2263 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
2264 if (LHSBase) {
2265 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
2266 if (RHSBase && LHSBase == RHSBase) {
2267
2268
2272 SimplifiedValues[&I] = C;
2273 ++NumConstantPtrDiffs;
2274 return true;
2275 }
2276 }
2277 }
2278
2279
2280
2281 return Base::visitSub(I);
2282}
2283
2284bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
2285 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2286 Constant *CLHS = getDirectOrSimplifiedValue(LHS);
2287 Constant *CRHS = getDirectOrSimplifiedValue(RHS);
2288
2289 Value *SimpleV = nullptr;
2291 SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2292 FI->getFastMathFlags(), DL);
2293 else
2294 SimpleV =
2296
2298 SimplifiedValues[&I] = C;
2299
2300 if (SimpleV)
2301 return true;
2302
2303
2304 disableSROA(LHS);
2305 disableSROA(RHS);
2306
2307
2308
2309
2310 using namespace llvm::PatternMatch;
2311 if (I.getType()->isFloatingPointTy() &&
2314 onCallPenalty();
2315
2316 return false;
2317}
2318
2319bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2320 Value *Op = I.getOperand(0);
2321 Constant *COp = getDirectOrSimplifiedValue(Op);
2322
2325
2327 SimplifiedValues[&I] = C;
2328
2329 if (SimpleV)
2330 return true;
2331
2332
2333 disableSROA(Op);
2334
2335 return false;
2336}
2337
2338bool CallAnalyzer::visitLoad(LoadInst &I) {
2339 if (handleSROA(I.getPointerOperand(), I.isSimple()))
2340 return true;
2341
2342
2343
2344
2345 if (EnableLoadElimination &&
2346 !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2347 onLoadEliminationOpportunity();
2348 return true;
2349 }
2350
2351 onMemAccess();
2352 return false;
2353}
2354
2355bool CallAnalyzer::visitStore(StoreInst &I) {
2356 if (handleSROA(I.getPointerOperand(), I.isSimple()))
2357 return true;
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367 disableLoadElimination();
2368
2369 onMemAccess();
2370 return false;
2371}
2372
2373bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2374 Value *Op = I.getAggregateOperand();
2375
2376
2377
2378 if (Value *SimpleOp = getSimplifiedValueUnchecked(Op)) {
2379 SimplifyQuery SQ(DL);
2381 if (SimpleV) {
2382 SimplifiedValues[&I] = SimpleV;
2383 return true;
2384 }
2385 }
2386
2387
2388 return Base::visitExtractValue(I);
2389}
2390
2391bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2392
2394 return true;
2395
2396
2397 return Base::visitInsertValue(I);
2398}
2399
2400
2401
2402
2403
2404
2405
2406bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2407
2408
2409
2410
2412 return false;
2413
2414
2418 Constant *C = getDirectOrSimplifiedValue(I);
2419 if ()
2420 return false;
2421
2423 }
2425 SimplifiedValues[&Call] = C;
2426 return true;
2427 }
2428
2429 return false;
2430}
2431
2432bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2433 const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;
2434 LibFunc LF;
2435 if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))
2437
2438 switch (LF) {
2439 case LibFunc_memcpy_chk:
2440 case LibFunc_memmove_chk:
2441 case LibFunc_mempcpy_chk:
2442 case LibFunc_memset_chk: {
2443
2444
2445
2446
2447
2448
2449 auto *LenOp = getDirectOrSimplifiedValue(Call.getOperand(2));
2450 auto *ObjSizeOp =
2451 getDirectOrSimplifiedValue(Call.getOperand(3));
2452 if (LenOp && ObjSizeOp &&
2453 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2454 return false;
2455 }
2456 break;
2457 }
2458 default:
2459 break;
2460 }
2461
2463}
2464
2465bool CallAnalyzer::visitCallBase(CallBase &Call) {
2466 if (!onCallBaseVisitStart(Call))
2467 return true;
2468
2470 .hasFnAttribute(Attribute::ReturnsTwice)) {
2471
2472 ExposesReturnsTwice = true;
2473 return false;
2474 }
2476 ContainsNoDuplicateCall = true;
2477
2479 onInlineAsm(*InlineAsmOp);
2480
2482 bool IsIndirectCall = ;
2483 if (IsIndirectCall) {
2484
2485
2487 F = getSimplifiedValue(Callee);
2489 onCallArgumentSetup(Call);
2490
2492 disableLoadElimination();
2493 return Base::visitCallBase(Call);
2494 }
2495 }
2496
2497 assert(F && "Expected a call to a known function");
2498
2499
2500 if (simplifyCallSite(F, Call))
2501 return true;
2502
2503
2504
2506 switch (II->getIntrinsicID()) {
2507 default:
2509 disableLoadElimination();
2510 return Base::visitCallBase(Call);
2511
2512 case Intrinsic::load_relative:
2513 onLoadRelativeIntrinsic();
2514 return false;
2515
2516 case Intrinsic::memset:
2517 case Intrinsic::memcpy:
2518 case Intrinsic::memmove:
2519 disableLoadElimination();
2520
2521 return false;
2522 case Intrinsic::icall_branch_funnel:
2523 case Intrinsic::localescape:
2524 HasUninlineableIntrinsic = true;
2525 return false;
2526 case Intrinsic::vastart:
2527 InitsVargArgs = true;
2528 return false;
2529 case Intrinsic::launder_invariant_group:
2530 case Intrinsic::strip_invariant_group:
2531 if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2532 SROAArgValues[II] = SROAArg;
2533 return true;
2534 case Intrinsic::is_constant:
2535 return simplifyIntrinsicCallIsConstant(Call);
2536 case Intrinsic::objectsize:
2537 return simplifyIntrinsicCallObjectSize(Call);
2538 }
2539 }
2540
2542
2543
2544 IsRecursiveCall = true;
2545 if (!AllowRecursiveCall)
2546 return false;
2547 }
2548
2549 if (isLoweredToCall(F, Call)) {
2550 onLoweredCall(F, Call, IsIndirectCall);
2551 }
2552
2554 disableLoadElimination();
2555 return Base::visitCallBase(Call);
2556}
2557
2558bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2559
2560 bool Free = !HasReturn;
2561 HasReturn = true;
2562 return Free;
2563}
2564
2565bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2566
2567
2568
2569
2571 getDirectOrSimplifiedValue(BI.getCondition()) ||
2572 BI.getMetadata(LLVMContext::MD_make_implicit);
2573}
2574
2575bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2576 bool CheckSROA = SI.getType()->isPointerTy();
2579
2580 Constant *TrueC = getDirectOrSimplifiedValue(TrueVal);
2581 Constant *FalseC = getDirectOrSimplifiedValue(FalseVal);
2582 Constant *CondC = getSimplifiedValue(SI.getCondition());
2583
2584 if (!CondC) {
2585
2586 if (TrueC == FalseC && TrueC) {
2587 SimplifiedValues[&SI] = TrueC;
2588 return true;
2589 }
2590
2591 if (!CheckSROA)
2592 return Base::visitSelectInst(SI);
2593
2594 std::pair<Value *, APInt> TrueBaseAndOffset =
2595 ConstantOffsetPtrs.lookup(TrueVal);
2596 std::pair<Value *, APInt> FalseBaseAndOffset =
2597 ConstantOffsetPtrs.lookup(FalseVal);
2598 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2599 ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
2600
2601 if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2602 SROAArgValues[&SI] = SROAArg;
2603 return true;
2604 }
2605
2606 return Base::visitSelectInst(SI);
2607 }
2608
2609
2612 : nullptr;
2613 if (!SelectedV) {
2614
2615
2616
2617 if (TrueC && FalseC) {
2619 SimplifiedValues[&SI] = C;
2620 return true;
2621 }
2622 }
2623 return Base::visitSelectInst(SI);
2624 }
2625
2626
2628 SimplifiedValues[&SI] = SelectedC;
2629 return true;
2630 }
2631
2632 if (!CheckSROA)
2633 return true;
2634
2635 std::pair<Value *, APInt> BaseAndOffset =
2636 ConstantOffsetPtrs.lookup(SelectedV);
2637 if (BaseAndOffset.first) {
2638 ConstantOffsetPtrs[&SI] = BaseAndOffset;
2639
2640 if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2641 SROAArgValues[&SI] = SROAArg;
2642 }
2643
2644 return true;
2645}
2646
2647bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2648
2649
2650 if (getDirectOrSimplifiedValue(SI.getCondition()))
2651 return true;
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665 unsigned JumpTableSize = 0;
2666 BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2667 unsigned NumCaseCluster =
2669
2670 onFinalizeSwitch(JumpTableSize, NumCaseCluster, SI.defaultDestUnreachable());
2671 return false;
2672}
2673
2674bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2675
2676
2677
2678
2679
2680
2681
2682
2683 HasIndirectBr = true;
2684 return false;
2685}
2686
2687bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2688
2689
2690 return false;
2691}
2692
2693bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2694
2695
2696 return false;
2697}
2698
2699bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2700
2701
2702 return false;
2703}
2704
2705bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2706
2707
2708
2709 return true;
2710}
2711
2712bool CallAnalyzer::visitInstruction(Instruction &I) {
2713
2714
2717 return true;
2718
2719
2720
2721 for (const Use &Op : I.operands())
2722 disableSROA(Op);
2723
2724 return false;
2725}
2726
2727
2728
2729
2730
2731
2732
2733
2734InlineResult
2735CallAnalyzer::analyzeBlock(BasicBlock *BB,
2736 const SmallPtrSetImpl<const Value *> &EphValues) {
2737 for (Instruction &I : *BB) {
2738
2739
2740
2741
2742
2743
2744
2745
2746 if (I.isDebugOrPseudoInst())
2747 continue;
2748
2749
2751 continue;
2752
2753 ++NumInstructions;
2755 ++NumVectorInstructions;
2756
2757
2758
2759
2760
2761
2762 onInstructionAnalysisStart(&I);
2763
2764 if (Base::visit(&I))
2765 ++NumInstructionsSimplified;
2766 else
2767 onMissedSimplification();
2768
2769 onInstructionAnalysisFinish(&I);
2770 using namespace ore;
2771
2773 if (IsRecursiveCall && !AllowRecursiveCall)
2775 else if (ExposesReturnsTwice)
2777 else if (HasDynamicAlloca)
2779 else if (HasIndirectBr)
2781 else if (HasUninlineableIntrinsic)
2783 else if (InitsVargArgs)
2785 if (.isSuccess()) {
2786 if (ORE)
2787 ORE->emit([&]() {
2788 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2789 &CandidateCall)
2790 << NV("Callee", &F) << " has uninlinable pattern ("
2791 << NV("InlineResult", IR.getFailureReason())
2792 << ") and cost is not fully computed";
2793 });
2794 return IR;
2795 }
2796
2797
2798
2799
2801 auto IR =
2803 if (ORE)
2804 ORE->emit([&]() {
2805 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2806 &CandidateCall)
2807 << NV("Callee", &F) << " is "
2808 << NV("InlineResult", IR.getFailureReason())
2809 << ". Cost is not fully computed";
2810 });
2811 return IR;
2812 }
2813
2814 if (shouldStop())
2816 "Call site analysis is not favorable to inlining.");
2817 }
2818
2820}
2821
2822
2823
2824
2825
2826
2827
2828ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2829 if (->getType()->isPointerTy())
2830 return nullptr;
2831
2832 unsigned AS = V->getType()->getPointerAddressSpace();
2833 unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2835
2836
2837
2838 SmallPtrSet<Value *, 4> Visited;
2840 do {
2842 if (->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2843 return nullptr;
2844 V = GEP->getPointerOperand();
2846 if (GA->isInterposable())
2847 break;
2848 V = GA->getAliasee();
2849 } else {
2850 break;
2851 }
2852 assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2853 } while (Visited.insert(V).second);
2854
2855 Type *IdxPtrTy = DL.getIndexType(V->getType());
2857}
2858
2859
2860
2861
2862
2863
2864
2865
2866void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2868
2869
2870 if (DeadBlocks.count(Pred))
2871 return true;
2872 BasicBlock *KnownSucc = KnownSuccessors[Pred];
2873 return KnownSucc && KnownSucc != Succ;
2874 };
2875
2876 auto IsNewlyDead = [&](BasicBlock *BB) {
2877
2878 return (!DeadBlocks.count(BB) &&
2880 [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2881 };
2882
2883 for (BasicBlock *Succ : successors(CurrBB)) {
2884 if (Succ == NextBB || !IsNewlyDead(Succ))
2885 continue;
2888 while (!NewDead.empty()) {
2890 if (DeadBlocks.insert(Dead).second)
2891
2892 for (BasicBlock *S : successors(Dead))
2893 if (IsNewlyDead(S))
2895 }
2896 }
2897}
2898
2899
2900
2901
2902
2903
2904
2905
2906InlineResult CallAnalyzer::analyze() {
2907 ++NumCallsAnalyzed;
2908
2909 auto Result = onAnalysisStart();
2910 if (.isSuccess())
2912
2913 if (F.empty())
2915
2917
2918 for (User *U : Caller->users()) {
2921 IsCallerRecursive = true;
2922 break;
2923 }
2924 }
2925
2926
2927
2928 auto CAI = CandidateCall.arg_begin();
2929 for (Argument &FAI : F.args()) {
2931 SimplifiedValues[&FAI] = *CAI;
2933 ++NumConstantArgs;
2934
2935 Value *PtrArg = *CAI;
2936 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2937 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2938
2939
2941 SROAArgValues[&FAI] = SROAArg;
2942 onInitializeSROAArg(SROAArg);
2943 EnabledSROAAllocas.insert(SROAArg);
2944 }
2945 }
2946 ++CAI;
2947 }
2948 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2949 NumAllocaArgs = SROAArgValues.size();
2950
2951
2952
2953 SmallPtrSet<const Value *, 32> EphValuesStorage;
2954 const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;
2955 if (GetEphValuesCache)
2956 EphValues = &GetEphValuesCache(F).ephValues();
2957 else
2959 EphValuesStorage);
2960
2961
2962
2963
2964
2965
2966
2967
2968 typedef SmallSetVector<BasicBlock *, 16> BBSetVector;
2969 BBSetVector BBWorklist;
2970 BBWorklist.insert(&F.getEntryBlock());
2971
2972
2973 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2974 if (shouldStop())
2975 break;
2976
2978 if (BB->empty())
2979 continue;
2980
2981 onBlockStart(BB);
2982
2983
2984
2985
2986
2987
2988
2989
2990
2995
2996
2997
2998 InlineResult IR = analyzeBlock(BB, *EphValues);
2999 if (.isSuccess())
3000 return IR;
3001
3003
3004
3005
3009 if (ConstantInt *SimpleCond = getSimplifiedValue(Cond)) {
3011 BBWorklist.insert(NextBB);
3012 KnownSuccessors[BB] = NextBB;
3013 findDeadBlocks(BB, NextBB);
3014 continue;
3015 }
3016 }
3019 if (ConstantInt *SimpleCond = getSimplifiedValue(Cond)) {
3020 BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
3021 BBWorklist.insert(NextBB);
3022 KnownSuccessors[BB] = NextBB;
3023 findDeadBlocks(BB, NextBB);
3024 continue;
3025 }
3026 }
3027
3028
3029
3030 BBWorklist.insert_range(successors(BB));
3031
3032 onBlockAnalyzed(BB);
3033 }
3034
3035
3036
3037
3038 if (!isSoleCallToLocalFunction(CandidateCall, F) && ContainsNoDuplicateCall)
3040
3041
3042
3043
3048 FinalStackSizeThreshold = *AttrMaxStackSize;
3049 if (AllocatedSize > FinalStackSizeThreshold)
3051
3052 return finalizeAnalysis();
3053}
3054
3055void InlineCostCallAnalyzer::print(raw_ostream &OS) {
3056#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
3058 F.print(OS, &Writer);
3073#undef DEBUG_PRINT_STAT
3074}
3075
3076#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3077
3079#endif
3080
3081
3082
3086
3087
3088
3089
3090 auto CalleeTLI = GetTLI(*Callee);
3092 TTI.areInlineCompatible(Caller, Callee)) &&
3093 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
3095 AttributeFuncs::areInlineCompatible(*Caller, *Callee);
3096}
3097
3100 int64_t Cost = 0;
3101 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
3102 if (Call.isByValArgument(I)) {
3103
3104
3106 unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
3108 unsigned PointerSize = DL.getPointerSizeInBits(AS);
3109
3110 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
3111
3112
3113
3114
3115
3116
3117
3118 NumStores = std::min(NumStores, 8U);
3119
3121 } else {
3122
3123
3125 }
3126 }
3127
3130
3131 return std::min<int64_t>(Cost, INT_MAX);
3132}
3133
3142 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3143 GetEphValuesCache);
3144}
3145
3152 const InlineParams Params = { 0,
3153 {},
3154 {},
3155 {},
3156 {},
3157 {},
3158 {},
3159 {},
3160 true,
3161 true};
3162
3163 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
3164 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,
3165 true);
3166 auto R = CA.analyze();
3167 if (!R.isSuccess())
3168 return std::nullopt;
3169 return CA.getCost();
3170}
3171
3178 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3179 PSI, ORE, *Call.getCalledFunction(), Call);
3180 auto R = CFA.analyze();
3181 if (!R.isSuccess())
3182 return std::nullopt;
3183 return CFA.features();
3184}
3185
3189
3190
3191 if (!Callee)
3193
3194
3195
3196
3197
3198 if (Callee->isPresplitCoroutine())
3200
3201
3202
3203
3204
3205
3206 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3207 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
3208 if (Call.isByValArgument(I)) {
3212 " address space");
3213 }
3214
3215
3216
3217 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
3218 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
3220
3222 if (IsViable.isSuccess())
3225 }
3226
3227
3228
3232
3233
3234 if (Caller->hasOptNone())
3236
3237
3238
3239 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3241
3242
3243 if (Callee->isInterposable())
3245
3246
3247 if (Callee->hasFnAttribute(Attribute::NoInline))
3249
3250
3251 if (Call.isNoInline())
3253
3254
3255 if (Callee->hasFnAttribute("loader-replaceable"))
3257
3258 return std::nullopt;
3259}
3260
3269
3270 auto UserDecision =
3272
3273 if (UserDecision) {
3274 if (UserDecision->isSuccess())
3277 }
3278
3281 "Inlining forced by -inline-all-viable-calls");
3282
3284 << "... (caller:" << Call.getCaller()->getName()
3285 << ")\n");
3286
3287 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3288 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3289 true, false,
3290 GetEphValuesCache);
3292
3294
3295
3296
3297
3298 if (CA.wasDecidedByCostBenefit()) {
3301 CA.getCostBenefitPair());
3302 else
3304 }
3305
3306 if (CA.wasDecidedByCostThreshold())
3308 CA.getStaticBonusApplied());
3309
3310
3314}
3315
3317 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
3319
3322
3323
3324
3329
3330 for (auto &II : BB) {
3333 continue;
3334
3335
3336 Function *Callee = Call->getCalledFunction();
3337 if (&F == Callee)
3339
3340
3341
3345
3346 if (Callee)
3347 switch (Callee->getIntrinsicID()) {
3348 default:
3349 break;
3350 case llvm::Intrinsic::icall_branch_funnel:
3351
3352
3354 "disallowed inlining of @llvm.icall.branch.funnel");
3355 case llvm::Intrinsic::localescape:
3356
3357
3359 "disallowed inlining of @llvm.localescape");
3360 case llvm::Intrinsic::vastart:
3361
3362
3364 "contains VarArgs initialized with va_start");
3365 }
3366 }
3367 }
3368
3370}
3371
3372
3373
3374
3377
3378
3379
3380
3381
3382
3383
3384
3387 else
3389
3390
3392
3393
3395
3396
3397
3398
3399
3400
3401
3402
3405
3406
3407
3409
3410
3411
3412
3413
3414
3415
3416
3417
3422 } else if (ColdThreshold.getNumOccurrences() > 0) {
3424 }
3425 return Params;
3426}
3427
3431
3432
3433
3435 unsigned SizeOptLevel) {
3436 if (OptLevel > 2)
3438 if (SizeOptLevel == 1)
3440 if (SizeOptLevel == 2)
3443}
3444
3446 auto Params =
3448
3449
3450
3451 if (OptLevel > 2)
3453 return Params;
3454}
3455
3463 };
3464
3469
3470
3471
3472
3473
3474
3480 if (!CalledFunction || CalledFunction->isDeclaration())
3481 continue;
3483 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI,
3484 GetAssumptionCache, nullptr, nullptr, PSI,
3485 &ORE);
3486 ICCA.analyze();
3487 OS << " Analyzing call of " << CalledFunction->getName()
3489 ICCA.print(OS);
3490 OS << "\n";
3491 }
3492 }
3493 }
3495}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static cl::opt< int > InlineAsmInstrCost("inline-asm-instr-cost", cl::Hidden, cl::init(0), cl::desc("Cost of a single inline asm instruction when inlining"))
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
Definition InlineCost.cpp:3434
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< bool > InlineAllViableCalls("inline-all-viable-calls", cl::Hidden, cl::init(false), cl::desc("Inline all viable calls, even if they exceed the inlining " "threshold"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
Definition InlineCost.cpp:3083
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
Machine Check Debug Module
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This pass exposes codegen information to IR-level passes.
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PointerType * getType() const
Overload to return most specific pointer type.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static LLVM_ABI BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getEntryFreq() const
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool onlyReadsMemory(unsigned OpNo) const
Value * getCalledOperand() const
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
LLVM_ABI bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
A cache of ephemeral values within a function.
Type * getReturnType() const
const BasicBlock & getEntryBlock() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
LLVM_ABI void collectAsmStrs(SmallVectorImpl< StringRef > &AsmStrs) const
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
TypeSize getElementOffset(unsigned Idx) const
Analysis pass providing the TargetTransformInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
LLVM_ABI unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
LLVM_ABI unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI int getInliningLastCallToStaticBonus() const
LLVM_ABI unsigned adjustInliningThreshold(const CallBase *CB) const
LLVM_ABI unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
LLVM_ABI int getInlinerVectorBonusPercent() const
LLVM_ABI bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
LLVM_ABI unsigned getInliningThresholdMultiplier() const
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
LLVM_ABI unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
LLVM_ABI InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
LLVM_ABI int getInstrCost()
Definition InlineCost.cpp:206
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
Definition InlineCost.cpp:197
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
Definition InlineCost.cpp:3316
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Function::ProfileCount ProfileCount
LLVM_ABI std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
Definition InlineCost.cpp:3172
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition InlineCost.cpp:3134
LLVM_ABI std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Definition InlineCost.cpp:3186
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
DWARFExpression::Operation Op
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition InlineCost.cpp:3428
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition InlineCost.cpp:3098
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
Definition InlineCost.cpp:3146
auto predecessors(const MachineBasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition InlineCost.cpp:3457
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.