LLVM: lib/Transforms/IPO/SampleProfile.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
81#include
82#include
83#include
84#include
85#include
86#include
87#include
88#include
89#include <system_error>
90#include
91#include
92
93using namespace llvm;
97#define DEBUG_TYPE "sample-profile"
98#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
99
101 "Number of functions inlined with context sensitive profile");
103 "Number of functions not inlined with context sensitive profile");
105 "Number of functions with CFG mismatched profile");
106STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
108 "Number of inlined callsites with a partial distribution factor");
109
111 "Number of functions with FDO inline stopped due to min size limit");
113 "Number of functions with FDO inline stopped due to max size limit");
115 NumCSInlinedHitGrowthLimit,
116 "Number of functions with FDO inline stopped due to growth size limit");
117
118namespace llvm {
119
120
121
125
126
127
128
131 cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
132
135 cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
136 "location for sample profile query."));
139 cl::desc("Salvage unused profile by matching with new "
140 "functions on call graph."));
141
144 cl::desc("Compute and report stale profile statistical metrics."));
145
148 cl::desc("Compute stale profile statistical metrics and write it into the "
149 "native object file(.llvm_stats section)."));
150
153 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
154 "callsite and function as having 0 samples. Otherwise, treat "
155 "un-sampled callsites and functions conservatively as unknown. "));
156
159 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
160 "branches and calls as having 0 samples. Otherwise, treat "
161 "them conservatively as unknown. "));
162
165 cl::desc("For symbols in profile symbol list, regard their profiles to "
166 "be accurate. It may be overridden by profile-sample-accurate. "));
167
170 cl::desc("Merge past inlinee's profile to outline version if sample "
171 "profile loader decided not to inline a call site. It will "
172 "only be enabled when top-down order of profile loading is "
173 "enabled. "));
174
177 cl::desc("Do profile annotation and inlining for functions in top-down "
178 "order of call graph during sample profile loading. It only "
179 "works for new pass manager. "));
180
183 cl::desc("Process functions in a top-down order "
184 "defined by the profiled call graph when "
185 "-sample-profile-top-down-load is on."));
186
189 cl::desc("Inline cold call sites in profile loader if it's beneficial "
190 "for code size."));
191
192
193
194
198 "If true, artificially skip inline transformation in sample-loader "
199 "pass, and merge (or scale) profiles (as configured by "
200 "--sample-profile-merge-inlinee)."));
201
204 cl::desc("Sort profiled recursion by edge weights."));
205
208 cl::desc("The size growth ratio limit for proirity-based sample profile "
209 "loader inlining."));
210
213 cl::desc("The lower bound of size growth limit for "
214 "proirity-based sample profile loader inlining."));
215
218 cl::desc("The upper bound of size growth limit for "
219 "proirity-based sample profile loader inlining."));
220
223 cl::desc("Hot callsite threshold for proirity-based sample profile loader "
224 "inlining."));
225
228 cl::desc("Threshold for inlining cold callsites"));
229}
230
234 "Relative hotness percentage threshold for indirect "
235 "call promotion in proirity-based sample profile loader inlining."));
236
240 "Skip relative hotness check for ICP up to given number of targets."));
241
244 cl::desc("A function is considered hot for staleness error check if its "
245 "total sample count is above the specified percentile"));
246
249 cl::desc("Skip the check if the number of hot functions is smaller than "
250 "the specified number."));
251
254 cl::desc("Reject the profile if the mismatch percent is higher than the "
255 "given number."));
256
258 "sample-profile-prioritized-inline", cl::Hidden,
259 cl::desc("Use call site prioritized inlining for sample profile loader. "
260 "Currently only CSSPGO is supported."));
261
263 "sample-profile-use-preinliner", cl::Hidden,
264 cl::desc("Use the preinliner decisions stored in profile context."));
265
267 "sample-profile-recursive-inline", cl::Hidden,
268 cl::desc("Allow sample loader inliner to inline recursive calls."));
269
272 cl::desc("Remove pseudo-probe after sample profile annotation."));
273
277 "Optimization remarks file containing inline remarks to be replayed "
278 "by inlining from sample profile loader."),
280
282 "sample-profile-inline-replay-scope",
285 "Replay on functions that have remarks associated "
286 "with them (default)"),
288 "Replay on the entire module")),
289 cl::desc("Whether inline replay should be applied to the entire "
290 "Module or just the Functions (default) that are present as "
291 "callers in remarks during sample profile inlining."),
293
295 "sample-profile-inline-replay-fallback",
300 "All decisions not in replay send to original advisor (default)"),
302 "AlwaysInline", "All decisions not in replay are inlined"),
304 "All decisions not in replay are not inlined")),
305 cl::desc("How sample profile inline replay treats sites that don't come "
306 "from the replay. Original: defers to original advisor, "
307 "AlwaysInline: inline all sites not in replay, NeverInline: "
308 "inline no sites not in replay"),
310
312 "sample-profile-inline-replay-format",
317 ":"),
319 "LineDiscriminator", "."),
321 "LineColumnDiscriminator",
322 ":. (default)")),
323 cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
324
327 cl::desc("Max number of promotions for a single indirect "
328 "call callsite in sample profile loader"));
329
332 cl::desc("Ignore existing branch weights on IR and always overwrite."));
333
336 cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
337 "sample-profile inline pass name."));
338
339namespace llvm {
341}
342
343namespace {
344
347using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
349using BlockEdgeMap =
351
352class GUIDToFuncNameMapper {
353public:
357 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
359 return;
360
361 for (const auto &F : CurrentModule) {
363 CurrentGUIDToFuncNameMap.insert(
365
366
367
368
369
370
371
372
374 if (CanonName != OrigName)
375 CurrentGUIDToFuncNameMap.insert(
377 }
378
379
380 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
381 }
382
383 ~GUIDToFuncNameMapper() {
385 return;
386
387 CurrentGUIDToFuncNameMap.clear();
388
389
390
391 SetGUIDToFuncNameMapForAll(nullptr);
392 }
393
394private:
396 std::queue<FunctionSamples *> FSToUpdate;
398 FSToUpdate.push(&IFS.second);
399 }
400
401 while (!FSToUpdate.empty()) {
403 FSToUpdate.pop();
404 FS->GUIDToFuncNameMap = Map;
405 for (const auto &ICS : FS->getCallsiteSamples()) {
407 for (const auto &IFS : FSMap) {
409 FSToUpdate.push(&FS);
410 }
411 }
412 }
413 }
414
416 Module &CurrentModule;
418};
419
420
421struct InlineCandidate {
422 CallBase *CallInstr;
423 const FunctionSamples *CalleeSamples;
424
425
426
427
428 uint64_t CallsiteCount;
429
430
431 float CallsiteDistribution;
432};
433
434
435struct CandidateComparer {
436 bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
437 if (LHS.CallsiteCount != RHS.CallsiteCount)
438 return LHS.CallsiteCount < RHS.CallsiteCount;
439
440 const FunctionSamples *LCS = LHS.CalleeSamples;
441 const FunctionSamples *RCS = RHS.CalleeSamples;
442
443
444 if (!LCS || !RCS)
445 return LCS;
446
447
450
451
453 }
454};
455
456using CandidateQueue =
458 CandidateComparer>;
459
460
461
462
463
464
466public:
467 SampleProfileLoader(
469 IntrusiveRefCntPtrvfs::FileSystem FS,
470 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
471 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
472 std::function<const TargetLibraryInfo &(Function &)> GetTLI,
473 LazyCallGraph &CG, bool DisableSampleProfileInlining,
474 bool UseFlattenedProfile)
475 : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
477 GetAC(std::move(GetAssumptionCache)),
478 GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
479 CG(CG), LTOPhase(LTOPhase),
484 DisableSampleProfileInlining(DisableSampleProfileInlining),
485 UseFlattenedProfile(UseFlattenedProfile) {}
486
489 ProfileSummaryInfo *_PSI);
490
491protected:
493 bool emitAnnotations(Function &F);
494 ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
495 const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
496 const FunctionSamples *
497 findFunctionSamples(const Instruction &I) const override;
498 std::vector<const FunctionSamples *>
499 findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
500 void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
501 DenseSetGlobalValue::GUID &InlinedGUIDs,
502 uint64_t Threshold);
503
504 bool tryPromoteAndInlineCandidate(
505 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
507
508 bool inlineHotFunctions(Function &F,
509 DenseSetGlobalValue::GUID &InlinedGUIDs);
510 std::optional getExternalInlineAdvisorCost(CallBase &CB);
511 bool getExternalInlineAdvisorShouldInline(CallBase &CB);
512 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
513 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
514 bool
515 tryInlineCandidate(InlineCandidate &Candidate,
517 bool
518 inlineHotFunctionsWithPriority(Function &F,
519 DenseSetGlobalValue::GUID &InlinedGUIDs);
520
521 bool shouldInlineColdCallee(CallBase &CallInst);
522 void emitOptimizationRemarksForInlineCandidates(
523 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
524 bool Hot);
525 void promoteMergeNotInlinedContextSamples(
526 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
527 const Function &F);
528 std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
529 std::unique_ptr buildProfiledCallGraph(Module &M);
530 void generateMDProfMetadata(Function &F);
531 bool rejectHighStalenessProfile(Module &M, ProfileSummaryInfo *PSI,
532 const SampleProfileMap &Profiles);
533 void removePseudoProbeInstsDiscriminator(Module &M);
534
535
536
537
538
539 HashKeyMap<std::unordered_map, FunctionId, Function *> SymbolMap;
540
541
542
543 HashKeyMap<std::unordered_map, FunctionId, FunctionId> FuncNameToProfNameMap;
544
545 std::function<AssumptionCache &(Function &)> GetAC;
546 std::function<TargetTransformInfo &(Function &)> GetTTI;
547 std::function<const TargetLibraryInfo &(Function &)> GetTLI;
548 LazyCallGraph &CG;
549
550
551 std::unique_ptr ContextTracker;
552
553
554
555
556
557
559 const std::string AnnotatedPassName;
560
561
562
563 std::shared_ptr PSL;
564
565
566
567
568 struct NotInlinedProfileInfo {
569 uint64_t entryCount;
570 };
571 DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
572
573
574
575 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
576
577
578
579 StringSet<> NamesInProfile;
580
581
582
583
584 llvm::DenseSet<uint64_t> GUIDsInProfile;
585
586
587
588
589
590
591 bool ProfAccForSymsInList;
592
593 bool DisableSampleProfileInlining;
594
595 bool UseFlattenedProfile;
596
597
598 std::unique_ptr ExternalInlineAdvisor;
599
600
601 std::unique_ptr MatchingManager;
602
603private:
604 const char *getAnnotatedRemarkPassName() const {
605 return AnnotatedPassName.c_str();
606 }
607};
608}
609
610namespace llvm {
611template <>
612inline bool SampleProfileInference::isExit(const BasicBlock *BB) {
614}
615
616template <>
617inline void SampleProfileInference::findUnlikelyJumps(
618 const std::vector<const BasicBlockT *> &BasicBlocks,
619 BlockEdgeMap &Successors, FlowFunction &Func) {
620 for (auto &Jump : Func.Jumps) {
621 const auto *BB = BasicBlocks[Jump.Source];
622 const auto *Succ = BasicBlocks[Jump.Target];
623 const Instruction *TI = BB->getTerminator();
624
625
626 const auto &Succs = Successors[BB];
627 if (Succs.size() == 2 && Succs.back() == Succ) {
629 Jump.IsUnlikely = true;
630 }
631 }
632 const Instruction *SuccTI = Succ->getTerminator();
633
636 Jump.IsUnlikely = true;
637 }
638 }
639 }
640}
641
642template <>
647
649
652}
653}
654
657 return getProbeWeight(Inst);
658
660 if (!DLoc)
661 return std::error_code();
662
663
664
665
667 return std::error_code();
668
669
670
671
672
673
674
677 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
678 return 0;
679
680 return getInstWeightImpl(Inst);
681}
682
683
684
685
686
687
688
689
690
691
692
693
694
695const FunctionSamples *
696SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
697 const DILocation *DIL = Inst.getDebugLoc();
698 if (!DIL) {
699 return nullptr;
700 }
701
702 StringRef CalleeName;
704 CalleeName = Callee->getName();
705
707 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
708
709 const FunctionSamples *FS = findFunctionSamples(Inst);
710 if (FS == nullptr)
711 return nullptr;
712
714 CalleeName, Reader->getRemapper(),
715 &FuncNameToProfNameMap);
716}
717
718
719
720
721std::vector<const FunctionSamples *>
722SampleProfileLoader::findIndirectCallFunctionSamples(
723 const Instruction &Inst, uint64_t &Sum) const {
724 const DILocation *DIL = Inst.getDebugLoc();
725 std::vector<const FunctionSamples *> R;
726
727 if (!DIL) {
728 return R;
729 }
730
731 auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
732 assert(L && R && "Expect non-null FunctionSamples");
733 if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
734 return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
735 return L->getGUID() < R->getGUID();
736 };
737
739 auto CalleeSamples =
740 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
741 if (CalleeSamples.empty())
742 return R;
743
744
745
746 Sum = 0;
747 for (const auto *const FS : CalleeSamples) {
748 Sum += FS->getHeadSamplesEstimate();
749 R.push_back(FS);
750 }
752 return R;
753 }
754
755 const FunctionSamples *FS = findFunctionSamples(Inst);
756 if (FS == nullptr)
757 return R;
758
760 Sum = 0;
761 if (auto T = FS->findCallTargetMapAt(CallSite))
762 for (const auto &T_C : *T)
763 Sum += T_C.second;
765 if (M->empty())
766 return R;
767 for (const auto &NameFS : *M) {
768 Sum += NameFS.second.getHeadSamplesEstimate();
769 R.push_back(&NameFS.second);
770 }
772 }
773 return R;
774}
775
776const FunctionSamples *
777SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
779 std::optional Probe = extractProbe(Inst);
780 if (!Probe)
781 return nullptr;
782 }
783
784 const DILocation *DIL = Inst.getDebugLoc();
785 if (!DIL)
786 return Samples;
787
788 auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
789 if (it.second) {
791 it.first->second = ContextTracker->getContextSamplesFor(DIL);
792 else
793 it.first->second = Samples->findFunctionSamples(
794 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
795 }
796 return it.first->second;
797}
798
799
800
801
802
803
804
805
810
811
812 if (ValueData.empty())
813 return true;
814
815 unsigned NumPromoted = 0;
816 for (const auto &V : ValueData) {
818 continue;
819
820
821
822
824 return false;
825 NumPromoted++;
826
828 return false;
829 }
830 return true;
831}
832
833
834
835
836
837
838static void
842
843
844
845
846
848 return;
849
853
855 if (Sum == 0) {
858 "If sum is 0, assume only one element in CallTargets "
859 "with count being NOMORE_ICP_MAGICNUM");
860
861 for (const auto &V : ValueData)
862 ValueCountMap[V.Value] = V.Count;
863 auto Pair =
865
866
867 if (!Pair.second) {
868 OldSum -= Pair.first->second;
870 }
871 Sum = OldSum;
872 } else {
873
874
875 for (const auto &V : ValueData) {
877 ValueCountMap[V.Value] = V.Count;
878 }
879
880 for (const auto &Data : CallTargets) {
882 if (Pair.second)
883 continue;
884
885
886
887 assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
888 Sum -= Data.Count;
889 }
890 }
891
893 for (const auto &ValueCount : ValueCountMap) {
895 InstrProfValueData{ValueCount.first, ValueCount.second});
896 }
897
899 [](const InstrProfValueData &L, const InstrProfValueData &R) {
900 return std::tie(L.Count, L.Value) > std::tie(R.Count, R.Value);
901 });
902
906 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
907}
908
909
910
911
912
913
914
915
916
917
918
919bool SampleProfileLoader::tryPromoteAndInlineCandidate(
920 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
922
923 if (DisableSampleProfileInlining)
924 return false;
925
926
927
929 return false;
930 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
931 auto R = SymbolMap.find(CalleeFunctionName);
932 if (R == SymbolMap.end() || ->second)
933 return false;
934
935 auto &CI = *Candidate.CallInstr;
937 return false;
938
939 const char *Reason = "Callee function not available";
940
941
942
943
944
945
946 if (->second->isDeclaration() && R->second->getSubprogram() &&
947 R->second->hasFnAttribute("use-sample-profile") &&
949
950
952 Function::getGUIDAssumingExternalLinkage(R->second->getName()),
955
957 CI, R->second, Candidate.CallsiteCount, Sum, false, ORE);
958 if (DI) {
959 Sum -= Candidate.CallsiteCount;
960
961
962
963
964
965
966
967
968
969
970
971
972 Candidate.CallInstr = DI;
974 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
975 if (!Inlined) {
976
977
979 *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
980 }
982 }
983 }
984 } else {
985 LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
987 Candidate.CallInstr->getName())<< " because "
988 << Reason << "\n");
989 }
990 return false;
991}
992
993bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
995 return false;
996
998 if (Callee == nullptr)
999 return false;
1000
1002 GetAC, GetTLI);
1003
1004 if (Cost.isNever())
1005 return false;
1006
1007 if (Cost.isAlways())
1008 return true;
1009
1011}
1012
1013void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1014 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1015 bool Hot) {
1016 for (auto *I : Candidates) {
1017 Function *CalledFunction = I->getCalledFunction();
1018 if (CalledFunction) {
1019 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1020 "InlineAttempt", I->getDebugLoc(),
1021 I->getParent())
1022 << "previous inlining reattempted for "
1023 << (Hot ? "hotness: '" : "size: '")
1024 << ore::NV("Callee", CalledFunction) << "' into '"
1025 << ore::NV("Caller", &F) << "'");
1026 }
1027 }
1028}
1029
1030void SampleProfileLoader::findExternalInlineCandidate(
1031 CallBase *CB, const FunctionSamples *Samples,
1032 DenseSetGlobalValue::GUID &InlinedGUIDs, uint64_t Threshold) {
1033
1034
1035
1036 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1037
1038
1039 if (!Samples) {
1040 InlinedGUIDs.insert(Function::getGUIDAssumingExternalLinkage(
1042 return;
1043 }
1044
1045 Threshold = 0;
1046 }
1047
1048
1049
1050
1051
1052
1053 if (!Samples)
1054 return;
1055
1056
1057
1059
1061 Threshold = 0;
1063 return;
1064 }
1065
1066 ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
1067 std::queue<ContextTrieNode *> CalleeList;
1068 CalleeList.push(Caller);
1069 while (!CalleeList.empty()) {
1070 ContextTrieNode *Node = CalleeList.front();
1071 CalleeList.pop();
1072 FunctionSamples *CalleeSample = Node->getFunctionSamples();
1073
1074
1075
1076 if (!CalleeSample)
1077 continue;
1078
1079
1080 bool PreInline =
1084 continue;
1085
1087
1088 if (!Func || Func->isDeclaration())
1090
1091
1092
1093 for (const auto &BS : CalleeSample->getBodySamples())
1094 for (const auto &TS : BS.second.getCallTargets())
1095 if (TS.second > Threshold) {
1096 const Function *Callee = SymbolMap.lookup(TS.first);
1097 if (!Callee || Callee->isDeclaration())
1098 InlinedGUIDs.insert(TS.first.getHashCode());
1099 }
1100
1101
1102
1103
1104
1105 for (auto &Child : Node->getAllChildContext()) {
1106 ContextTrieNode *CalleeNode = &Child.second;
1107 CalleeList.push(CalleeNode);
1108 }
1109 }
1110}
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134bool SampleProfileLoader::inlineHotFunctions(
1135 Function &F, DenseSetGlobalValue::GUID &InlinedGUIDs) {
1136
1137
1138 assert((!ProfAccForSymsInList ||
1140 .hasFnAttribute("profile-sample-accurate"))) &&
1141 "ProfAccForSymsInList should be false when profile-sample-accurate "
1142 "is enabled");
1143
1144 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1146 bool LocalChanged = true;
1147 while (LocalChanged) {
1148 LocalChanged = false;
1150 for (auto &BB : F) {
1151 bool Hot = false;
1154 for (auto &I : BB) {
1155 const FunctionSamples *FS = nullptr;
1158 if ((FS = findCalleeFunctionSamples(*CB))) {
1160 "GUIDToFuncNameMap has to be populated");
1162 if (FS->getHeadSamplesEstimate() > 0 ||
1164 LocalNotInlinedCallSites.insert({CB, FS});
1165 if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1166 Hot = true;
1167 else if (shouldInlineColdCallee(*CB))
1169 } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1171 }
1172 }
1173 }
1174 }
1175 if (Hot || ExternalInlineAdvisor) {
1177 emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1178 } else {
1180 emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1181 }
1182 }
1183 for (CallBase *I : CIS) {
1184 Function *CalledFunction = I->getCalledFunction();
1185 InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1186 0 ,
1187 1.0 };
1188
1189 if (CalledFunction == &F)
1190 continue;
1191 if (I->isIndirectCall()) {
1192 uint64_t Sum;
1193 for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1194 uint64_t SumOrigin = Sum;
1195 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1196 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1197 PSI->getOrCompHotCountThreshold());
1198 continue;
1199 }
1200 if ((FS, PSI, ProfAccForSymsInList))
1201 continue;
1202
1203 Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
1204 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1205 LocalNotInlinedCallSites.erase(I);
1206 LocalChanged = true;
1207 }
1208 }
1209 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1211 if (tryInlineCandidate(Candidate)) {
1212 LocalNotInlinedCallSites.erase(I);
1213 LocalChanged = true;
1214 }
1215 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1216 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1217 InlinedGUIDs,
1218 PSI->getOrCompHotCountThreshold());
1219 }
1220 }
1221 Changed |= LocalChanged;
1222 }
1223
1224
1225
1227 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1229}
1230
1231bool SampleProfileLoader::tryInlineCandidate(
1233
1234
1235 if (DisableSampleProfileInlining)
1236 return false;
1237
1238 CallBase &CB = *Candidate.CallInstr;
1240 assert(CalledFunction && "Expect a callee with definition");
1243
1244 InlineCost Cost = shouldInlineCandidate(Candidate);
1245 if (Cost.isNever()) {
1246 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1247 "InlineFail", DLoc, BB)
1248 << "incompatible inlining");
1249 return false;
1250 }
1251
1253 return false;
1254
1255 InlineFunctionInfo IFI(GetAC);
1256 IFI.UpdateProfile = false;
1258 true);
1259 if (.isSuccess())
1260 return false;
1261
1262
1264 Cost, true, getAnnotatedRemarkPassName());
1265
1266
1267 if (InlinedCallSites) {
1268 InlinedCallSites->clear();
1270 }
1271
1273 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1274 ++NumCSInlined;
1275
1276
1277
1278
1279
1280
1281
1282
1283 if (Candidate.CallsiteDistribution < 1) {
1284 for (auto &I : IFI.InlinedCallSites) {
1285 if (std::optional Probe = extractProbe(*I))
1287 Candidate.CallsiteDistribution);
1288 }
1289 NumDuplicatedInlinesite++;
1290 }
1291
1292 return true;
1293}
1294
1295bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1296 CallBase *CB) {
1297 assert(CB && "Expect non-null call instruction");
1298
1300 return false;
1301
1302
1303 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1304
1305
1306 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1307 return false;
1308
1309 float Factor = 1.0;
1310 if (std::optional Probe = extractProbe(*CB))
1311 Factor = Probe->Factor;
1312
1313 uint64_t CallsiteCount =
1315 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1316 return true;
1317}
1318
1319std::optional
1320SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1321 std::unique_ptr Advice = nullptr;
1322 if (ExternalInlineAdvisor) {
1323 Advice = ExternalInlineAdvisor->getAdvice(CB);
1324 if (Advice) {
1325 if (!Advice->isInliningRecommended()) {
1326 Advice->recordUnattemptedInlining();
1328 }
1329 Advice->recordInlining();
1331 }
1332 }
1333
1334 return {};
1335}
1336
1337bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1338 std::optional Cost = getExternalInlineAdvisorCost(CB);
1339 return Cost ? !!*Cost : false;
1340}
1341
1342InlineCost
1343SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1344 if (std::optional ReplayCost =
1345 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1346 return *ReplayCost;
1347
1348
1351 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1355 }
1356
1358 assert(Callee && "Expect a definition for inline candidate of direct call");
1359
1361
1364
1365
1366
1367
1368
1369
1370 InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1371 GetTTI(*Callee), GetAC, GetTLI);
1372
1373
1374 if (Cost.isNever() || Cost.isAlways())
1375 return Cost;
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1387
1388
1389
1394 }
1395
1396
1397
1398
1401 }
1402
1403
1404
1406}
1407
1408bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1409 Function &F, DenseSetGlobalValue::GUID &InlinedGUIDs) {
1410
1411
1412 assert((!ProfAccForSymsInList ||
1414 .hasFnAttribute("profile-sample-accurate"))) &&
1415 "ProfAccForSymsInList should be false when profile-sample-accurate "
1416 "is enabled");
1417
1418
1419
1420 CandidateQueue CQueue;
1421 InlineCandidate NewCandidate;
1422 for (auto &BB : F) {
1423 for (auto &I : BB) {
1425 if (!CB)
1426 continue;
1427 if (getInlineCandidate(&NewCandidate, CB))
1428 CQueue.push(NewCandidate);
1429 }
1430 }
1431
1432
1433
1434
1435
1437 "Max inline size limit should not be smaller than min inline size "
1438 "limit.");
1442 if (ExternalInlineAdvisor)
1443 SizeLimit = std::numeric_limits::max();
1444
1445 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1446
1447
1449 while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1450 InlineCandidate Candidate = CQueue.top();
1451 CQueue.pop();
1452 CallBase *I = Candidate.CallInstr;
1453 Function *CalledFunction = I->getCalledFunction();
1454
1455 if (CalledFunction == &F)
1456 continue;
1457 if (I->isIndirectCall()) {
1458 uint64_t Sum = 0;
1459 auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1460 uint64_t SumOrigin = Sum;
1461 Sum *= Candidate.CallsiteDistribution;
1462 unsigned ICPCount = 0;
1463 for (const auto *FS : CalleeSamples) {
1464
1465 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1466 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1467 PSI->getOrCompHotCountThreshold());
1468 continue;
1469 }
1470 uint64_t EntryCountDistributed =
1471 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1472
1473
1474
1475
1478 break;
1479
1480
1481
1482
1483
1484
1485
1486 if (!PSI->isHotCount(EntryCountDistributed))
1487 break;
1489
1490
1491 Candidate = {I, FS, EntryCountDistributed,
1492 Candidate.CallsiteDistribution};
1493 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1494 &InlinedCallSites)) {
1495 for (auto *CB : InlinedCallSites) {
1496 if (getInlineCandidate(&NewCandidate, CB))
1497 CQueue.emplace(NewCandidate);
1498 }
1499 ICPCount++;
1501 } else if (!ContextTracker) {
1502 LocalNotInlinedCallSites.insert({I, FS});
1503 }
1504 }
1505 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1508 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1509 for (auto *CB : InlinedCallSites) {
1510 if (getInlineCandidate(&NewCandidate, CB))
1511 CQueue.emplace(NewCandidate);
1512 }
1514 } else if (!ContextTracker) {
1515 LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});
1516 }
1517 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1518 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1519 InlinedGUIDs,
1520 PSI->getOrCompHotCountThreshold());
1521 }
1522 }
1523
1524 if (!CQueue.empty()) {
1526 ++NumCSInlinedHitMaxLimit;
1528 ++NumCSInlinedHitMinLimit;
1529 else
1530 ++NumCSInlinedHitGrowthLimit;
1531 }
1532
1533
1534
1536 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1538}
1539
1540void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1541 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
1542 const Function &F) {
1543
1544 for (const auto &Pair : NonInlinedCallSites) {
1545 CallBase *I = Pair.first;
1547 if (!Callee || Callee->isDeclaration())
1548 continue;
1549
1550 ORE->emit(
1551 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
1552 I->getDebugLoc(), I->getParent())
1553 << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
1554 << "' into '" << ore::NV("Caller", &F) << "'");
1555
1556 ++NumCSNotInlined;
1557 const FunctionSamples *FS = Pair.second;
1558 if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
1559 continue;
1560 }
1561
1562
1564 continue;
1565
1567
1568
1569
1570
1571
1572 if (FS->getHeadSamples() == 0) {
1573
1574
1575 const_cast<FunctionSamples *>(FS)->addHeadSamples(
1576 FS->getHeadSamplesEstimate());
1577
1578
1579
1580
1581 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
1582
1583
1584 if (!OutlineFS)
1585 OutlineFS = &OutlineFunctionSamples[
1587 OutlineFS->merge(*FS, 1);
1588
1590 }
1591 } else {
1592 auto pair =
1593 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1594 pair.first->second.entryCount += FS->getHeadSamplesEstimate();
1595 }
1596 }
1597}
1598
1599
1604 R.emplace_back(
1605 InstrProfValueData{I.first.getHashCode(), I.second});
1606 }
1607 return R;
1608}
1609
1610
1611
1612void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1613
1614
1615 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1616 LLVMContext &Ctx = F.getContext();
1617 MDBuilder MDB(Ctx);
1618 for (auto &BI : F) {
1620
1621 if (BlockWeights[BB]) {
1622 for (auto &I : *BB) {
1624 continue;
1626 const DebugLoc &DLoc = I.getDebugLoc();
1627 if (!DLoc)
1628 continue;
1629 const DILocation *DIL = DLoc;
1630 const FunctionSamples *FS = findFunctionSamples(I);
1631 if (!FS)
1632 continue;
1634 ErrorOrSampleRecord::CallTargetMap T =
1635 FS->findCallTargetMapAt(CallSite);
1636 if ( || T.get().empty())
1637 continue;
1639
1640
1641
1642 if (std::optional Probe = extractProbe(I)) {
1643 if (Probe->Factor < 1)
1645 }
1646 }
1649 uint64_t Sum = 0;
1650 for (const auto &C : T.get())
1651 Sum += C.second;
1652
1653
1654
1657 FS->findFunctionSamplesMapAt(CallSite)) {
1658 for (const auto &NameFS : *M)
1659 Sum += NameFS.second.getHeadSamplesEstimate();
1660 }
1661 }
1662 if (Sum)
1665 I.setMetadata(LLVMContext::MD_prof, nullptr);
1668 I, ArrayRef<uint32_t>{static_cast<uint32_t>(BlockWeights[BB])},
1669 false);
1670 }
1671 }
1673
1674
1675 for (auto &I : *BB) {
1678 I.setMetadata(LLVMContext::MD_prof, nullptr);
1679 } else {
1681 false);
1682 }
1683 }
1684 }
1685 }
1686
1689 continue;
1692 continue;
1693
1695 LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1696 << ((BranchLoc) ? Twine(BranchLoc.getLine())
1697 : Twine(""))
1698 << ".\n");
1699 SmallVector<uint32_t, 4> Weights;
1700 uint32_t MaxWeight = 0;
1702
1703
1704
1705 DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1706 std::vector<uint64_t> EdgeIndex;
1711 EdgeIndex[I] = EdgeMultiplicity[Succ];
1712 EdgeMultiplicity[Succ]++;
1713 }
1714 }
1717 Edge E = std::make_pair(BB, Succ);
1718 uint64_t Weight = EdgeWeights[E];
1720
1721
1722
1723 if (Weight > std::numeric_limits<uint32_t>::max()) {
1724 LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)\n");
1725 Weight = std::numeric_limits<uint32_t>::max();
1726 }
1728
1729
1730 Weights.push_back(static_cast<uint32_t>(
1731 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1732 : Weight + 1));
1733 } else {
1734
1735
1736 uint64_t W = Weight / EdgeMultiplicity[Succ];
1737
1738 if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1739 W++;
1740 Weights.push_back(static_cast<uint32_t>(W));
1741 }
1742 if (Weight != 0) {
1743 if (Weight > MaxWeight) {
1744 MaxWeight = Weight;
1745 MaxDestInst = &*Succ->getFirstNonPHIOrDbgOrLifetime();
1746 }
1747 }
1748 }
1749
1751
1752 uint64_t TempWeight;
1753
1754
1755
1756
1757
1758
1759
1760
1761 if (MaxWeight > 0 &&
1763 LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1765 ORE->emit([&]() {
1766 return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1767 << "most popular destination for conditional branches at "
1768 << ore::NV("CondBranchesLoc", BranchLoc);
1769 });
1770 } else {
1772 TI->setMetadata(LLVMContext::MD_prof, nullptr);
1773 LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1774 } else {
1775 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1776 }
1777 }
1778 }
1779}
1780
1781
1782
1783
1784
1785
1786
1787bool SampleProfileLoader::emitAnnotations(Function &F) {
1789
1792 if (!ProbeManager->getDesc(F))
1793 dbgs() << "Probe descriptor missing for Function " << F.getName()
1794 << "\n";
1795 });
1796
1797 if (ProbeManager->profileIsValid(F, *Samples)) {
1798 ++NumMatchedProfile;
1799 } else {
1800 ++NumMismatchedProfile;
1802 dbgs() << "Profile is invalid due to CFG mismatch for Function "
1803 << F.getName() << "\n");
1805 return false;
1806 }
1807 } else {
1808 if (getFunctionLoc(F) == 0)
1809 return false;
1810
1811 LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1812 << F.getName() << ": " << getFunctionLoc(F) << "\n");
1813 }
1814
1815 DenseSetGlobalValue::GUID InlinedGUIDs;
1817 Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1818 else
1819 Changed |= inlineHotFunctions(F, InlinedGUIDs);
1820
1821 Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1822
1824 generateMDProfMetadata(F);
1825
1826 emitCoverageRemarks(F);
1828}
1829
1830std::unique_ptr
1831SampleProfileLoader::buildProfiledCallGraph(Module &M) {
1832 std::unique_ptr ProfiledCG;
1834 ProfiledCG = std::make_unique(*ContextTracker);
1835 else
1836 ProfiledCG = std::make_unique(Reader->getProfiles());
1837
1838
1839
1840
1841 for (Function &F : M) {
1843 continue;
1844 ProfiledCG->addProfiledFunction(
1846 }
1847
1848 return ProfiledCG;
1849}
1850
1851std::vector<Function *>
1852SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
1853 std::vector<Function *> FunctionOrderList;
1854 FunctionOrderList.reserve(M.size());
1855
1857 errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1858 "together with -sample-profile-top-down-load.\n";
1859
1862
1863
1864
1865
1866
1868 }
1869
1870 for (Function &F : M)
1872 FunctionOrderList.push_back(&F);
1873 return FunctionOrderList;
1874 }
1875
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926 std::unique_ptr ProfiledCG = buildProfiledCallGraph(M);
1927 scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1928 while (!CGI.isAtEnd()) {
1929 auto Range = *CGI;
1931
1932 scc_member_iterator<ProfiledCallGraph *> SI(*CGI);
1934 }
1935 for (auto *Node : Range) {
1938 FunctionOrderList.push_back(F);
1939 }
1940 ++CGI;
1941 }
1942 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1943 } else
1945
1947 dbgs() << "Function processing order:\n";
1948 for (auto F : FunctionOrderList) {
1949 dbgs() << F->getName() << "\n";
1950 }
1951 });
1952
1953 return FunctionOrderList;
1954}
1955
1956bool SampleProfileLoader::doInitialization(Module &M,
1958 auto &Ctx = M.getContext();
1959
1961 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1962 if (std::error_code EC = ReaderOrErr.getError()) {
1963 std::string Msg = "Could not open profile: " + EC.message();
1964 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1965 return false;
1966 }
1967 Reader = std::move(ReaderOrErr.get());
1968 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1969
1970
1971 Reader->setModule(&M);
1972 if (std::error_code EC = Reader->read()) {
1973 std::string Msg = "profile reading failed: " + EC.message();
1974 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1975 return false;
1976 }
1977
1978 PSL = Reader->getProfileSymbolList();
1979
1982
1983 if (UseFlattenedProfile)
1985 Reader->profileIsCS());
1986
1987
1988 ProfAccForSymsInList =
1990 if (ProfAccForSymsInList) {
1991 NamesInProfile.clear();
1992 GUIDsInProfile.clear();
1993 if (auto NameTable = Reader->getNameTable()) {
1995 for (auto Name : *NameTable)
1996 GUIDsInProfile.insert(Name.getHashCode());
1997 } else {
1998 for (auto Name : *NameTable)
1999 NamesInProfile.insert(Name.stringRef());
2000 }
2001 }
2002 CoverageTracker.setProfAccForSymsInList(true);
2003 }
2004
2007 M, *FAM, Ctx, nullptr,
2012 false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2013 }
2014
2015
2016 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2017 Reader->profileIsProbeBased()) {
2024
2029
2032
2033 if (Reader->profileIsPreInlined()) {
2036 }
2037
2038
2039
2040
2041
2042
2043 if (Reader->profileIsProbeBased()) {
2048 }
2049
2050 if (!Reader->profileIsCS()) {
2051
2052
2053
2054
2059 }
2060 }
2061
2062 if (Reader->profileIsCS()) {
2063
2064 ContextTracker = std::make_unique(
2065 Reader->getProfiles(), &GUIDToFuncNameMap);
2066 }
2067
2068
2069 if (Reader->profileIsProbeBased()) {
2070 ProbeManager = std::make_unique(M);
2071 if (!ProbeManager->moduleIsProbed(M)) {
2072 const char *Msg =
2073 "Pseudo-probe-based profile requires SampleProfileProbePass";
2074 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2076 return false;
2077 }
2078 }
2079
2082 MatchingManager = std::make_unique(
2083 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2084 FuncNameToProfNameMap);
2085 }
2086
2087 return true;
2088}
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100bool SampleProfileLoader::rejectHighStalenessProfile(
2101 Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
2103 "Only support for probe-based profile");
2104 uint64_t TotalHotFunc = 0;
2105 uint64_t NumMismatchedFunc = 0;
2106 for (const auto &I : Profiles) {
2107 const auto &FS = I.second;
2108 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
2109 if (!FuncDesc)
2110 continue;
2111
2112
2114 FS.getTotalSamples()))
2115 continue;
2116
2117 TotalHotFunc++;
2118 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2119 NumMismatchedFunc++;
2120 }
2121
2122
2124 return false;
2125
2126
2127 if (NumMismatchedFunc * 100 >=
2129 auto &Ctx = M.getContext();
2130 const char *Msg =
2131 "The input profile significantly mismatches current source code. "
2132 "Please recollect profile to avoid performance regression.";
2133 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
2134 return true;
2135 }
2136 return false;
2137}
2138
2139void SampleProfileLoader::removePseudoProbeInstsDiscriminator(Module &M) {
2140 for (auto &F : M) {
2141 std::vector<Instruction *> InstsToDel;
2142 for (auto &BB : F) {
2143 for (auto &I : BB) {
2145 InstsToDel.push_back(&I);
2147 if (const DILocation *DIL = I.getDebugLoc().get()) {
2148
2149 unsigned Discriminator = DIL->getDiscriminator();
2151 std::optional<uint32_t> DwarfDiscriminator =
2153 Discriminator);
2154 I.setDebugLoc(
2156 }
2157 }
2158 }
2159 }
2160 for (auto *I : InstsToDel)
2161 I->eraseFromParent();
2162 }
2163}
2164
2166 ProfileSummaryInfo *_PSI) {
2167 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2168
2169 PSI = _PSI;
2170 if (M.getProfileSummary( false) == nullptr) {
2171 M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2174 }
2175
2177 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2178 return false;
2179
2180 auto Remapper = Reader->getRemapper();
2181
2182 for (const auto &N_F : M.getValueSymbolTable()) {
2183 StringRef OrigName = N_F.getKey();
2185 if (F == nullptr || OrigName.empty())
2186 continue;
2187 SymbolMap[FunctionId(OrigName)] = F;
2189 if (OrigName != NewName && !NewName.empty()) {
2190 auto r = SymbolMap.emplace(FunctionId(NewName), F);
2191
2192
2193
2194
2195 if (!r.second)
2196 r.first->second = nullptr;
2197 OrigName = NewName;
2198 }
2199
2200 if (Remapper) {
2201 if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2202 if (*MapName != OrigName && !MapName->empty())
2203 SymbolMap.emplace(FunctionId(*MapName), F);
2204 }
2205 }
2206 }
2207
2208
2211 MatchingManager->runOnModule();
2212 MatchingManager->clearMatchingData();
2213 }
2214 assert(SymbolMap.count(FunctionId()) == 0 &&
2215 "No empty StringRef should be added in SymbolMap");
2217 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2218 "not enabled");
2219
2220 bool retval = false;
2221 for (auto *F : buildFunctionOrder(M, CG)) {
2222 assert(->isDeclaration());
2223 clearFunctionData();
2225 }
2226
2227
2229 for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2230 notInlinedCallInfo)
2232
2235 removePseudoProbeInstsDiscriminator(M);
2237 M.eraseNamedMetadata(FuncInfo);
2238 }
2239
2240 return retval;
2241}
2242
2243bool SampleProfileLoader::runOnFunction(Function &F,
2245 LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2246 DILocation2SampleMap.clear();
2247
2248
2249
2250
2251 uint64_t initialEntryCount = -1;
2252
2255
2256
2257 initialEntryCount = 0;
2258
2259
2260 ProfAccForSymsInList = false;
2261 }
2262 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2263
2264
2265
2266
2267
2268 if (ProfAccForSymsInList) {
2269
2270 if (PSL->contains(F.getName()))
2271 initialEntryCount = 0;
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2285 GUIDsInProfile.count(
2286 Function::getGUIDAssumingExternalLinkage(CanonName))) ||
2288 initialEntryCount = -1;
2289 }
2290
2291
2292
2293 if (.getEntryCount())
2296 .getManager();
2298
2300 Samples = ContextTracker->getBaseSamplesFor(F);
2301 else {
2302 Samples = Reader->getSamplesFor(F);
2303
2304
2305 if (!Samples) {
2307 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
2308 if (It != OutlineFunctionSamples.end()) {
2309 Samples = &It->second;
2310 } else if (auto Remapper = Reader->getRemapper()) {
2311 if (auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2312 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
2313 if (It != OutlineFunctionSamples.end())
2314 Samples = &It->second;
2315 }
2316 }
2317 }
2318 }
2319
2320 if (Samples && !Samples->empty())
2321 return emitAnnotations(F);
2322 return false;
2323}
2325 std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
2327 bool UseFlattenedProfile)
2328 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2329 LTOPhase(LTOPhase), FS(std::move(FS)),
2330 DisableSampleProfileInlining(DisableSampleProfileInlining),
2331 UseFlattenedProfile(UseFlattenedProfile) {}
2332
2337
2340 };
2343 };
2346 };
2347
2348 if (!FS)
2351
2352 SampleProfileLoader SampleLoader(
2355 : ProfileRemappingFileName,
2356 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2357 DisableSampleProfileInlining, UseFlattenedProfile);
2358 if (!SampleLoader.doInitialization(M, &FAM))
2360
2362 if (!SampleLoader.runOnModule(M, AM, PSI))
2364
2366}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static LVReader * CurrentReader
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
std::pair< BasicBlock *, BasicBlock * > Edge
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition SampleProfile.cpp:839
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition SampleProfile.cpp:806
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition SampleProfile.cpp:1601
#define CSINLINE_DEBUG
Definition SampleProfile.cpp:98
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", ""), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", ":"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "."), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", ":. (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
Defines the virtual file system interface vfs::FileSystem.
bool empty() const
Returns true if the analysis manager has an empty results cache.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
LLVM_ABI unsigned getLine() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
void computeDominanceAndLoopInfo(FunctionT &F)
PostDominatorTreePtrT PDT
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition SampleProfile.cpp:2333
LLVM_ABI SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
Definition SampleProfile.cpp:2324
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
std::pair< typename Base::iterator, bool > insert(StringRef key)
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
Representation of the samples collected for a function.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
static LLVM_ABI bool ProfileIsCS
FunctionId getFunction() const
Return the function name.
static LLVM_ABI bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
void setContextSynthetic()
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
Sample-based profile reader.
static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
LLVM_ABI cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
LLVM_ABI void setProbeDistributionFactor(Instruction &Inst, float Factor)
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_ABI std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
cl::opt< bool > SampleProfileUseProfi
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
llvm:🆑:opt< bool > UseIterativeBFIInference
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
FunctionAddr VTableAddr Count
Function::ProfileCount ProfileCount
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI cl::opt< int > SampleHotCallSiteThreshold
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
LLVM_ABI cl::opt< int > SampleColdCallSiteThreshold
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
static bool skipProfileForFunction(const Function &F)
LLVM_ABI cl::opt< bool > SortProfiledSCC
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite"))
LLVM_ABI cl::opt< int > ProfileInlineLimitMax
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
cl::opt< bool > EnableExtTspBlockPlacement
Definition SampleProfile.cpp:340
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
LLVM_ABI cl::opt< int > ProfileInlineGrowthLimit
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
constexpr const char * PseudoProbeDescMetadataName
Implement std::hash so that hash_code can be used in STL containers.
@ LineColumnDiscriminator
A wrapper of binary function with basic blocks and jumps.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)