LLVM: lib/Transforms/IPO/SampleProfile.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

81#include

82#include

83#include

84#include

85#include

86#include

87#include

88#include

89#include <system_error>

90#include

91#include

92

93using namespace llvm;

97#define DEBUG_TYPE "sample-profile"

98#define CSINLINE_DEBUG DEBUG_TYPE "-inline"

99

101 "Number of functions inlined with context sensitive profile");

103 "Number of functions not inlined with context sensitive profile");

105 "Number of functions with CFG mismatched profile");

106STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");

108 "Number of inlined callsites with a partial distribution factor");

109

111 "Number of functions with FDO inline stopped due to min size limit");

113 "Number of functions with FDO inline stopped due to max size limit");

115 NumCSInlinedHitGrowthLimit,

116 "Number of functions with FDO inline stopped due to growth size limit");

117

118namespace llvm {

119

120

121

125

126

127

128

131 cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);

132

135 cl::desc("Salvage stale profile by fuzzy matching and use the remapped "

136 "location for sample profile query."));

139 cl::desc("Salvage unused profile by matching with new "

140 "functions on call graph."));

141

144 cl::desc("Compute and report stale profile statistical metrics."));

145

148 cl::desc("Compute stale profile statistical metrics and write it into the "

149 "native object file(.llvm_stats section)."));

150

153 cl::desc("If the sample profile is accurate, we will mark all un-sampled "

154 "callsite and function as having 0 samples. Otherwise, treat "

155 "un-sampled callsites and functions conservatively as unknown. "));

156

159 cl::desc("If the sample profile is accurate, we will mark all un-sampled "

160 "branches and calls as having 0 samples. Otherwise, treat "

161 "them conservatively as unknown. "));

162

165 cl::desc("For symbols in profile symbol list, regard their profiles to "

166 "be accurate. It may be overridden by profile-sample-accurate. "));

167

170 cl::desc("Merge past inlinee's profile to outline version if sample "

171 "profile loader decided not to inline a call site. It will "

172 "only be enabled when top-down order of profile loading is "

173 "enabled. "));

174

177 cl::desc("Do profile annotation and inlining for functions in top-down "

178 "order of call graph during sample profile loading. It only "

179 "works for new pass manager. "));

180

183 cl::desc("Process functions in a top-down order "

184 "defined by the profiled call graph when "

185 "-sample-profile-top-down-load is on."));

186

189 cl::desc("Inline cold call sites in profile loader if it's beneficial "

190 "for code size."));

191

192

193

194

198 "If true, artificially skip inline transformation in sample-loader "

199 "pass, and merge (or scale) profiles (as configured by "

200 "--sample-profile-merge-inlinee)."));

201

204 cl::desc("Sort profiled recursion by edge weights."));

205

208 cl::desc("The size growth ratio limit for proirity-based sample profile "

209 "loader inlining."));

210

213 cl::desc("The lower bound of size growth limit for "

214 "proirity-based sample profile loader inlining."));

215

218 cl::desc("The upper bound of size growth limit for "

219 "proirity-based sample profile loader inlining."));

220

223 cl::desc("Hot callsite threshold for proirity-based sample profile loader "

224 "inlining."));

225

228 cl::desc("Threshold for inlining cold callsites"));

229}

230

234 "Relative hotness percentage threshold for indirect "

235 "call promotion in proirity-based sample profile loader inlining."));

236

240 "Skip relative hotness check for ICP up to given number of targets."));

241

244 cl::desc("A function is considered hot for staleness error check if its "

245 "total sample count is above the specified percentile"));

246

249 cl::desc("Skip the check if the number of hot functions is smaller than "

250 "the specified number."));

251

254 cl::desc("Reject the profile if the mismatch percent is higher than the "

255 "given number."));

256

258 "sample-profile-prioritized-inline", cl::Hidden,

259 cl::desc("Use call site prioritized inlining for sample profile loader. "

260 "Currently only CSSPGO is supported."));

261

263 "sample-profile-use-preinliner", cl::Hidden,

264 cl::desc("Use the preinliner decisions stored in profile context."));

265

267 "sample-profile-recursive-inline", cl::Hidden,

268 cl::desc("Allow sample loader inliner to inline recursive calls."));

269

272 cl::desc("Remove pseudo-probe after sample profile annotation."));

273

277 "Optimization remarks file containing inline remarks to be replayed "

278 "by inlining from sample profile loader."),

280

282 "sample-profile-inline-replay-scope",

285 "Replay on functions that have remarks associated "

286 "with them (default)"),

288 "Replay on the entire module")),

289 cl::desc("Whether inline replay should be applied to the entire "

290 "Module or just the Functions (default) that are present as "

291 "callers in remarks during sample profile inlining."),

293

295 "sample-profile-inline-replay-fallback",

300 "All decisions not in replay send to original advisor (default)"),

302 "AlwaysInline", "All decisions not in replay are inlined"),

304 "All decisions not in replay are not inlined")),

305 cl::desc("How sample profile inline replay treats sites that don't come "

306 "from the replay. Original: defers to original advisor, "

307 "AlwaysInline: inline all sites not in replay, NeverInline: "

308 "inline no sites not in replay"),

310

312 "sample-profile-inline-replay-format",

317 ":"),

319 "LineDiscriminator", "."),

321 "LineColumnDiscriminator",

322 ":. (default)")),

323 cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);

324

327 cl::desc("Max number of promotions for a single indirect "

328 "call callsite in sample profile loader"));

329

332 cl::desc("Ignore existing branch weights on IR and always overwrite."));

333

336 cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "

337 "sample-profile inline pass name."));

338

339namespace llvm {

341}

342

343namespace {

344

347using Edge = std::pair<const BasicBlock *, const BasicBlock *>;

349using BlockEdgeMap =

351

352class GUIDToFuncNameMapper {

353public:

357 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {

359 return;

360

361 for (const auto &F : CurrentModule) {

363 CurrentGUIDToFuncNameMap.insert(

365

366

367

368

369

370

371

372

374 if (CanonName != OrigName)

375 CurrentGUIDToFuncNameMap.insert(

377 }

378

379

380 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);

381 }

382

383 ~GUIDToFuncNameMapper() {

385 return;

386

387 CurrentGUIDToFuncNameMap.clear();

388

389

390

391 SetGUIDToFuncNameMapForAll(nullptr);

392 }

393

394private:

396 std::queue<FunctionSamples *> FSToUpdate;

398 FSToUpdate.push(&IFS.second);

399 }

400

401 while (!FSToUpdate.empty()) {

403 FSToUpdate.pop();

404 FS->GUIDToFuncNameMap = Map;

405 for (const auto &ICS : FS->getCallsiteSamples()) {

407 for (const auto &IFS : FSMap) {

409 FSToUpdate.push(&FS);

410 }

411 }

412 }

413 }

414

416 Module &CurrentModule;

418};

419

420

421struct InlineCandidate {

422 CallBase *CallInstr;

423 const FunctionSamples *CalleeSamples;

424

425

426

427

428 uint64_t CallsiteCount;

429

430

431 float CallsiteDistribution;

432};

433

434

435struct CandidateComparer {

436 bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {

437 if (LHS.CallsiteCount != RHS.CallsiteCount)

438 return LHS.CallsiteCount < RHS.CallsiteCount;

439

440 const FunctionSamples *LCS = LHS.CalleeSamples;

441 const FunctionSamples *RCS = RHS.CalleeSamples;

442

443

444 if (!LCS || !RCS)

445 return LCS;

446

447

450

451

453 }

454};

455

456using CandidateQueue =

458 CandidateComparer>;

459

460

461

462

463

464

466public:

467 SampleProfileLoader(

469 IntrusiveRefCntPtrvfs::FileSystem FS,

470 std::function<AssumptionCache &(Function &)> GetAssumptionCache,

471 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,

472 std::function<const TargetLibraryInfo &(Function &)> GetTLI,

473 LazyCallGraph &CG, bool DisableSampleProfileInlining,

474 bool UseFlattenedProfile)

475 : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),

477 GetAC(std::move(GetAssumptionCache)),

478 GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),

479 CG(CG), LTOPhase(LTOPhase),

484 DisableSampleProfileInlining(DisableSampleProfileInlining),

485 UseFlattenedProfile(UseFlattenedProfile) {}

486

489 ProfileSummaryInfo *_PSI);

490

491protected:

493 bool emitAnnotations(Function &F);

494 ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;

495 const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;

496 const FunctionSamples *

497 findFunctionSamples(const Instruction &I) const override;

498 std::vector<const FunctionSamples *>

499 findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;

500 void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,

501 DenseSetGlobalValue::GUID &InlinedGUIDs,

502 uint64_t Threshold);

503

504 bool tryPromoteAndInlineCandidate(

505 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,

507

508 bool inlineHotFunctions(Function &F,

509 DenseSetGlobalValue::GUID &InlinedGUIDs);

510 std::optional getExternalInlineAdvisorCost(CallBase &CB);

511 bool getExternalInlineAdvisorShouldInline(CallBase &CB);

512 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);

513 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);

514 bool

515 tryInlineCandidate(InlineCandidate &Candidate,

517 bool

518 inlineHotFunctionsWithPriority(Function &F,

519 DenseSetGlobalValue::GUID &InlinedGUIDs);

520

521 bool shouldInlineColdCallee(CallBase &CallInst);

522 void emitOptimizationRemarksForInlineCandidates(

523 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,

524 bool Hot);

525 void promoteMergeNotInlinedContextSamples(

526 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,

527 const Function &F);

528 std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);

529 std::unique_ptr buildProfiledCallGraph(Module &M);

530 void generateMDProfMetadata(Function &F);

531 bool rejectHighStalenessProfile(Module &M, ProfileSummaryInfo *PSI,

532 const SampleProfileMap &Profiles);

533 void removePseudoProbeInstsDiscriminator(Module &M);

534

535

536

537

538

539 HashKeyMap<std::unordered_map, FunctionId, Function *> SymbolMap;

540

541

542

543 HashKeyMap<std::unordered_map, FunctionId, FunctionId> FuncNameToProfNameMap;

544

545 std::function<AssumptionCache &(Function &)> GetAC;

546 std::function<TargetTransformInfo &(Function &)> GetTTI;

547 std::function<const TargetLibraryInfo &(Function &)> GetTLI;

548 LazyCallGraph &CG;

549

550

551 std::unique_ptr ContextTracker;

552

553

554

555

556

557

559 const std::string AnnotatedPassName;

560

561

562

563 std::shared_ptr PSL;

564

565

566

567

568 struct NotInlinedProfileInfo {

569 uint64_t entryCount;

570 };

571 DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;

572

573

574

575 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;

576

577

578

579 StringSet<> NamesInProfile;

580

581

582

583

584 llvm::DenseSet<uint64_t> GUIDsInProfile;

585

586

587

588

589

590

591 bool ProfAccForSymsInList;

592

593 bool DisableSampleProfileInlining;

594

595 bool UseFlattenedProfile;

596

597

598 std::unique_ptr ExternalInlineAdvisor;

599

600

601 std::unique_ptr MatchingManager;

602

603private:

604 const char *getAnnotatedRemarkPassName() const {

605 return AnnotatedPassName.c_str();

606 }

607};

608}

609

610namespace llvm {

611template <>

612inline bool SampleProfileInference::isExit(const BasicBlock *BB) {

614}

615

616template <>

617inline void SampleProfileInference::findUnlikelyJumps(

618 const std::vector<const BasicBlockT *> &BasicBlocks,

619 BlockEdgeMap &Successors, FlowFunction &Func) {

620 for (auto &Jump : Func.Jumps) {

621 const auto *BB = BasicBlocks[Jump.Source];

622 const auto *Succ = BasicBlocks[Jump.Target];

623 const Instruction *TI = BB->getTerminator();

624

625

626 const auto &Succs = Successors[BB];

627 if (Succs.size() == 2 && Succs.back() == Succ) {

629 Jump.IsUnlikely = true;

630 }

631 }

632 const Instruction *SuccTI = Succ->getTerminator();

633

636 Jump.IsUnlikely = true;

637 }

638 }

639 }

640}

641

642template <>

646 DT->recalculate(F);

647

649

651 LI->analyze(*DT);

652}

653}

654

657 return getProbeWeight(Inst);

658

660 if (!DLoc)

661 return std::error_code();

662

663

664

665

667 return std::error_code();

668

669

670

671

672

673

674

677 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))

678 return 0;

679

680 return getInstWeightImpl(Inst);

681}

682

683

684

685

686

687

688

689

690

691

692

693

694

695const FunctionSamples *

696SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {

697 const DILocation *DIL = Inst.getDebugLoc();

698 if (!DIL) {

699 return nullptr;

700 }

701

702 StringRef CalleeName;

704 CalleeName = Callee->getName();

705

707 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);

708

709 const FunctionSamples *FS = findFunctionSamples(Inst);

710 if (FS == nullptr)

711 return nullptr;

712

714 CalleeName, Reader->getRemapper(),

715 &FuncNameToProfNameMap);

716}

717

718

719

720

721std::vector<const FunctionSamples *>

722SampleProfileLoader::findIndirectCallFunctionSamples(

723 const Instruction &Inst, uint64_t &Sum) const {

724 const DILocation *DIL = Inst.getDebugLoc();

725 std::vector<const FunctionSamples *> R;

726

727 if (!DIL) {

728 return R;

729 }

730

731 auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {

732 assert(L && R && "Expect non-null FunctionSamples");

733 if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())

734 return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();

735 return L->getGUID() < R->getGUID();

736 };

737

739 auto CalleeSamples =

740 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);

741 if (CalleeSamples.empty())

742 return R;

743

744

745

746 Sum = 0;

747 for (const auto *const FS : CalleeSamples) {

748 Sum += FS->getHeadSamplesEstimate();

749 R.push_back(FS);

750 }

752 return R;

753 }

754

755 const FunctionSamples *FS = findFunctionSamples(Inst);

756 if (FS == nullptr)

757 return R;

758

760 Sum = 0;

761 if (auto T = FS->findCallTargetMapAt(CallSite))

762 for (const auto &T_C : *T)

763 Sum += T_C.second;

765 if (M->empty())

766 return R;

767 for (const auto &NameFS : *M) {

768 Sum += NameFS.second.getHeadSamplesEstimate();

769 R.push_back(&NameFS.second);

770 }

772 }

773 return R;

774}

775

776const FunctionSamples *

777SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {

779 std::optional Probe = extractProbe(Inst);

780 if (!Probe)

781 return nullptr;

782 }

783

784 const DILocation *DIL = Inst.getDebugLoc();

785 if (!DIL)

786 return Samples;

787

788 auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);

789 if (it.second) {

791 it.first->second = ContextTracker->getContextSamplesFor(DIL);

792 else

793 it.first->second = Samples->findFunctionSamples(

794 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);

795 }

796 return it.first->second;

797}

798

799

800

801

802

803

804

805

810

811

812 if (ValueData.empty())

813 return true;

814

815 unsigned NumPromoted = 0;

816 for (const auto &V : ValueData) {

818 continue;

819

820

821

822

824 return false;

825 NumPromoted++;

826

828 return false;

829 }

830 return true;

831}

832

833

834

835

836

837

838static void

842

843

844

845

846

848 return;

849

853

855 if (Sum == 0) {

858 "If sum is 0, assume only one element in CallTargets "

859 "with count being NOMORE_ICP_MAGICNUM");

860

861 for (const auto &V : ValueData)

862 ValueCountMap[V.Value] = V.Count;

863 auto Pair =

865

866

867 if (!Pair.second) {

868 OldSum -= Pair.first->second;

870 }

871 Sum = OldSum;

872 } else {

873

874

875 for (const auto &V : ValueData) {

877 ValueCountMap[V.Value] = V.Count;

878 }

879

880 for (const auto &Data : CallTargets) {

882 if (Pair.second)

883 continue;

884

885

886

887 assert(Sum >= Data.Count && "Sum should never be less than Data.Count");

888 Sum -= Data.Count;

889 }

890 }

891

893 for (const auto &ValueCount : ValueCountMap) {

895 InstrProfValueData{ValueCount.first, ValueCount.second});

896 }

897

899 [](const InstrProfValueData &L, const InstrProfValueData &R) {

900 return std::tie(L.Count, L.Value) > std::tie(R.Count, R.Value);

901 });

902

906 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);

907}

908

909

910

911

912

913

914

915

916

917

918

919bool SampleProfileLoader::tryPromoteAndInlineCandidate(

920 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,

922

923 if (DisableSampleProfileInlining)

924 return false;

925

926

927

929 return false;

930 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();

931 auto R = SymbolMap.find(CalleeFunctionName);

932 if (R == SymbolMap.end() || R->second)

933 return false;

934

935 auto &CI = *Candidate.CallInstr;

937 return false;

938

939 const char *Reason = "Callee function not available";

940

941

942

943

944

945

946 if (R->second->isDeclaration() && R->second->getSubprogram() &&

947 R->second->hasFnAttribute("use-sample-profile") &&

949

950

952 Function::getGUIDAssumingExternalLinkage(R->second->getName()),

955

957 CI, R->second, Candidate.CallsiteCount, Sum, false, ORE);

958 if (DI) {

959 Sum -= Candidate.CallsiteCount;

960

961

962

963

964

965

966

967

968

969

970

971

972 Candidate.CallInstr = DI;

974 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);

975 if (!Inlined) {

976

977

979 *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);

980 }

982 }

983 }

984 } else {

985 LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "

987 Candidate.CallInstr->getName())<< " because "

988 << Reason << "\n");

989 }

990 return false;

991}

992

993bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {

995 return false;

996

998 if (Callee == nullptr)

999 return false;

1000

1002 GetAC, GetTLI);

1003

1004 if (Cost.isNever())

1005 return false;

1006

1007 if (Cost.isAlways())

1008 return true;

1009

1011}

1012

1013void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(

1014 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,

1015 bool Hot) {

1016 for (auto *I : Candidates) {

1017 Function *CalledFunction = I->getCalledFunction();

1018 if (CalledFunction) {

1019 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),

1020 "InlineAttempt", I->getDebugLoc(),

1021 I->getParent())

1022 << "previous inlining reattempted for "

1023 << (Hot ? "hotness: '" : "size: '")

1024 << ore::NV("Callee", CalledFunction) << "' into '"

1025 << ore::NV("Caller", &F) << "'");

1026 }

1027 }

1028}

1029

1030void SampleProfileLoader::findExternalInlineCandidate(

1031 CallBase *CB, const FunctionSamples *Samples,

1032 DenseSetGlobalValue::GUID &InlinedGUIDs, uint64_t Threshold) {

1033

1034

1035

1036 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {

1037

1038

1039 if (!Samples) {

1040 InlinedGUIDs.insert(Function::getGUIDAssumingExternalLinkage(

1042 return;

1043 }

1044

1045 Threshold = 0;

1046 }

1047

1048

1049

1050

1051

1052

1053 if (!Samples)

1054 return;

1055

1056

1057

1059

1061 Threshold = 0;

1063 return;

1064 }

1065

1066 ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);

1067 std::queue<ContextTrieNode *> CalleeList;

1068 CalleeList.push(Caller);

1069 while (!CalleeList.empty()) {

1070 ContextTrieNode *Node = CalleeList.front();

1071 CalleeList.pop();

1072 FunctionSamples *CalleeSample = Node->getFunctionSamples();

1073

1074

1075

1076 if (!CalleeSample)

1077 continue;

1078

1079

1080 bool PreInline =

1084 continue;

1085

1087

1088 if (!Func || Func->isDeclaration())

1090

1091

1092

1093 for (const auto &BS : CalleeSample->getBodySamples())

1094 for (const auto &TS : BS.second.getCallTargets())

1095 if (TS.second > Threshold) {

1096 const Function *Callee = SymbolMap.lookup(TS.first);

1097 if (!Callee || Callee->isDeclaration())

1098 InlinedGUIDs.insert(TS.first.getHashCode());

1099 }

1100

1101

1102

1103

1104

1105 for (auto &Child : Node->getAllChildContext()) {

1106 ContextTrieNode *CalleeNode = &Child.second;

1107 CalleeList.push(CalleeNode);

1108 }

1109 }

1110}

1111

1112

1113

1114

1115

1116

1117

1118

1119

1120

1121

1122

1123

1124

1125

1126

1127

1128

1129

1130

1131

1132

1133

1134bool SampleProfileLoader::inlineHotFunctions(

1135 Function &F, DenseSetGlobalValue::GUID &InlinedGUIDs) {

1136

1137

1138 assert((!ProfAccForSymsInList ||

1140 F.hasFnAttribute("profile-sample-accurate"))) &&

1141 "ProfAccForSymsInList should be false when profile-sample-accurate "

1142 "is enabled");

1143

1144 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;

1146 bool LocalChanged = true;

1147 while (LocalChanged) {

1148 LocalChanged = false;

1150 for (auto &BB : F) {

1151 bool Hot = false;

1154 for (auto &I : BB) {

1155 const FunctionSamples *FS = nullptr;

1158 if ((FS = findCalleeFunctionSamples(*CB))) {

1160 "GUIDToFuncNameMap has to be populated");

1162 if (FS->getHeadSamplesEstimate() > 0 ||

1164 LocalNotInlinedCallSites.insert({CB, FS});

1165 if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))

1166 Hot = true;

1167 else if (shouldInlineColdCallee(*CB))

1169 } else if (getExternalInlineAdvisorShouldInline(*CB)) {

1171 }

1172 }

1173 }

1174 }

1175 if (Hot || ExternalInlineAdvisor) {

1177 emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);

1178 } else {

1180 emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);

1181 }

1182 }

1183 for (CallBase *I : CIS) {

1184 Function *CalledFunction = I->getCalledFunction();

1185 InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),

1186 0 ,

1187 1.0 };

1188

1189 if (CalledFunction == &F)

1190 continue;

1191 if (I->isIndirectCall()) {

1192 uint64_t Sum;

1193 for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {

1194 uint64_t SumOrigin = Sum;

1195 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {

1196 findExternalInlineCandidate(I, FS, InlinedGUIDs,

1197 PSI->getOrCompHotCountThreshold());

1198 continue;

1199 }

1200 if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))

1201 continue;

1202

1203 Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};

1204 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {

1205 LocalNotInlinedCallSites.erase(I);

1206 LocalChanged = true;

1207 }

1208 }

1209 } else if (CalledFunction && CalledFunction->getSubprogram() &&

1211 if (tryInlineCandidate(Candidate)) {

1212 LocalNotInlinedCallSites.erase(I);

1213 LocalChanged = true;

1214 }

1215 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {

1216 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),

1217 InlinedGUIDs,

1218 PSI->getOrCompHotCountThreshold());

1219 }

1220 }

1221 Changed |= LocalChanged;

1222 }

1223

1224

1225

1227 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);

1229}

1230

1231bool SampleProfileLoader::tryInlineCandidate(

1233

1234

1235 if (DisableSampleProfileInlining)

1236 return false;

1237

1238 CallBase &CB = *Candidate.CallInstr;

1240 assert(CalledFunction && "Expect a callee with definition");

1243

1244 InlineCost Cost = shouldInlineCandidate(Candidate);

1245 if (Cost.isNever()) {

1246 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),

1247 "InlineFail", DLoc, BB)

1248 << "incompatible inlining");

1249 return false;

1250 }

1251

1253 return false;

1254

1255 InlineFunctionInfo IFI(GetAC);

1256 IFI.UpdateProfile = false;

1258 true);

1259 if (IR.isSuccess())

1260 return false;

1261

1262

1264 Cost, true, getAnnotatedRemarkPassName());

1265

1266

1267 if (InlinedCallSites) {

1268 InlinedCallSites->clear();

1270 }

1271

1273 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);

1274 ++NumCSInlined;

1275

1276

1277

1278

1279

1280

1281

1282

1283 if (Candidate.CallsiteDistribution < 1) {

1284 for (auto &I : IFI.InlinedCallSites) {

1285 if (std::optional Probe = extractProbe(*I))

1287 Candidate.CallsiteDistribution);

1288 }

1289 NumDuplicatedInlinesite++;

1290 }

1291

1292 return true;

1293}

1294

1295bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,

1296 CallBase *CB) {

1297 assert(CB && "Expect non-null call instruction");

1298

1300 return false;

1301

1302

1303 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);

1304

1305

1306 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))

1307 return false;

1308

1309 float Factor = 1.0;

1310 if (std::optional Probe = extractProbe(*CB))

1311 Factor = Probe->Factor;

1312

1313 uint64_t CallsiteCount =

1315 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};

1316 return true;

1317}

1318

1319std::optional

1320SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {

1321 std::unique_ptr Advice = nullptr;

1322 if (ExternalInlineAdvisor) {

1323 Advice = ExternalInlineAdvisor->getAdvice(CB);

1324 if (Advice) {

1325 if (!Advice->isInliningRecommended()) {

1326 Advice->recordUnattemptedInlining();

1328 }

1329 Advice->recordInlining();

1331 }

1332 }

1333

1334 return {};

1335}

1336

1337bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {

1338 std::optional Cost = getExternalInlineAdvisorCost(CB);

1339 return Cost ? !!*Cost : false;

1340}

1341

1342InlineCost

1343SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {

1344 if (std::optional ReplayCost =

1345 getExternalInlineAdvisorCost(*Candidate.CallInstr))

1346 return *ReplayCost;

1347

1348

1351 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())

1355 }

1356

1358 assert(Callee && "Expect a definition for inline candidate of direct call");

1359

1361

1364

1365

1366

1367

1368

1369

1370 InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,

1371 GetTTI(*Callee), GetAC, GetTLI);

1372

1373

1374 if (Cost.isNever() || Cost.isAlways())

1375 return Cost;

1376

1377

1378

1379

1380

1381

1382

1383

1384

1385

1387

1388

1389

1394 }

1395

1396

1397

1398

1401 }

1402

1403

1404

1406}

1407

1408bool SampleProfileLoader::inlineHotFunctionsWithPriority(

1409 Function &F, DenseSetGlobalValue::GUID &InlinedGUIDs) {

1410

1411

1412 assert((!ProfAccForSymsInList ||

1414 F.hasFnAttribute("profile-sample-accurate"))) &&

1415 "ProfAccForSymsInList should be false when profile-sample-accurate "

1416 "is enabled");

1417

1418

1419

1420 CandidateQueue CQueue;

1421 InlineCandidate NewCandidate;

1422 for (auto &BB : F) {

1423 for (auto &I : BB) {

1425 if (!CB)

1426 continue;

1427 if (getInlineCandidate(&NewCandidate, CB))

1428 CQueue.push(NewCandidate);

1429 }

1430 }

1431

1432

1433

1434

1435

1437 "Max inline size limit should not be smaller than min inline size "

1438 "limit.");

1442 if (ExternalInlineAdvisor)

1443 SizeLimit = std::numeric_limits::max();

1444

1445 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;

1446

1447

1449 while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {

1450 InlineCandidate Candidate = CQueue.top();

1451 CQueue.pop();

1452 CallBase *I = Candidate.CallInstr;

1453 Function *CalledFunction = I->getCalledFunction();

1454

1455 if (CalledFunction == &F)

1456 continue;

1457 if (I->isIndirectCall()) {

1458 uint64_t Sum = 0;

1459 auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);

1460 uint64_t SumOrigin = Sum;

1461 Sum *= Candidate.CallsiteDistribution;

1462 unsigned ICPCount = 0;

1463 for (const auto *FS : CalleeSamples) {

1464

1465 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {

1466 findExternalInlineCandidate(I, FS, InlinedGUIDs,

1467 PSI->getOrCompHotCountThreshold());

1468 continue;

1469 }

1470 uint64_t EntryCountDistributed =

1471 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;

1472

1473

1474

1475

1478 break;

1479

1480

1481

1482

1483

1484

1485

1486 if (!PSI->isHotCount(EntryCountDistributed))

1487 break;

1489

1490

1491 Candidate = {I, FS, EntryCountDistributed,

1492 Candidate.CallsiteDistribution};

1493 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,

1494 &InlinedCallSites)) {

1495 for (auto *CB : InlinedCallSites) {

1496 if (getInlineCandidate(&NewCandidate, CB))

1497 CQueue.emplace(NewCandidate);

1498 }

1499 ICPCount++;

1501 } else if (!ContextTracker) {

1502 LocalNotInlinedCallSites.insert({I, FS});

1503 }

1504 }

1505 } else if (CalledFunction && CalledFunction->getSubprogram() &&

1508 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {

1509 for (auto *CB : InlinedCallSites) {

1510 if (getInlineCandidate(&NewCandidate, CB))

1511 CQueue.emplace(NewCandidate);

1512 }

1514 } else if (!ContextTracker) {

1515 LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});

1516 }

1517 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {

1518 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),

1519 InlinedGUIDs,

1520 PSI->getOrCompHotCountThreshold());

1521 }

1522 }

1523

1524 if (!CQueue.empty()) {

1526 ++NumCSInlinedHitMaxLimit;

1528 ++NumCSInlinedHitMinLimit;

1529 else

1530 ++NumCSInlinedHitGrowthLimit;

1531 }

1532

1533

1534

1536 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);

1538}

1539

1540void SampleProfileLoader::promoteMergeNotInlinedContextSamples(

1541 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,

1542 const Function &F) {

1543

1544 for (const auto &Pair : NonInlinedCallSites) {

1545 CallBase *I = Pair.first;

1547 if (!Callee || Callee->isDeclaration())

1548 continue;

1549

1550 ORE->emit(

1551 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",

1552 I->getDebugLoc(), I->getParent())

1553 << "previous inlining not repeated: '" << ore::NV("Callee", Callee)

1554 << "' into '" << ore::NV("Caller", &F) << "'");

1555

1556 ++NumCSNotInlined;

1557 const FunctionSamples *FS = Pair.second;

1558 if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {

1559 continue;

1560 }

1561

1562

1564 continue;

1565

1567

1568

1569

1570

1571

1572 if (FS->getHeadSamples() == 0) {

1573

1574

1575 const_cast<FunctionSamples *>(FS)->addHeadSamples(

1576 FS->getHeadSamplesEstimate());

1577

1578

1579

1580

1581 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);

1582

1583

1584 if (!OutlineFS)

1585 OutlineFS = &OutlineFunctionSamples[

1587 OutlineFS->merge(*FS, 1);

1588

1590 }

1591 } else {

1592 auto pair =

1593 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});

1594 pair.first->second.entryCount += FS->getHeadSamplesEstimate();

1595 }

1596 }

1597}

1598

1599

1604 R.emplace_back(

1605 InstrProfValueData{I.first.getHashCode(), I.second});

1606 }

1607 return R;

1608}

1609

1610

1611

1612void SampleProfileLoader::generateMDProfMetadata(Function &F) {

1613

1614

1615 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");

1616 LLVMContext &Ctx = F.getContext();

1617 MDBuilder MDB(Ctx);

1618 for (auto &BI : F) {

1620

1621 if (BlockWeights[BB]) {

1622 for (auto &I : *BB) {

1624 continue;

1626 const DebugLoc &DLoc = I.getDebugLoc();

1627 if (!DLoc)

1628 continue;

1629 const DILocation *DIL = DLoc;

1630 const FunctionSamples *FS = findFunctionSamples(I);

1631 if (!FS)

1632 continue;

1634 ErrorOrSampleRecord::CallTargetMap T =

1635 FS->findCallTargetMapAt(CallSite);

1636 if (T || T.get().empty())

1637 continue;

1639

1640

1641

1642 if (std::optional Probe = extractProbe(I)) {

1643 if (Probe->Factor < 1)

1645 }

1646 }

1649 uint64_t Sum = 0;

1650 for (const auto &C : T.get())

1651 Sum += C.second;

1652

1653

1654

1657 FS->findFunctionSamplesMapAt(CallSite)) {

1658 for (const auto &NameFS : *M)

1659 Sum += NameFS.second.getHeadSamplesEstimate();

1660 }

1661 }

1662 if (Sum)

1665 I.setMetadata(LLVMContext::MD_prof, nullptr);

1668 I, ArrayRef<uint32_t>{static_cast<uint32_t>(BlockWeights[BB])},

1669 false);

1670 }

1671 }

1673

1674

1675 for (auto &I : *BB) {

1678 I.setMetadata(LLVMContext::MD_prof, nullptr);

1679 } else {

1681 false);

1682 }

1683 }

1684 }

1685 }

1686

1689 continue;

1692 continue;

1693

1695 LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "

1696 << ((BranchLoc) ? Twine(BranchLoc.getLine())

1697 : Twine(""))

1698 << ".\n");

1699 SmallVector<uint32_t, 4> Weights;

1700 uint32_t MaxWeight = 0;

1702

1703

1704

1705 DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;

1706 std::vector<uint64_t> EdgeIndex;

1711 EdgeIndex[I] = EdgeMultiplicity[Succ];

1712 EdgeMultiplicity[Succ]++;

1713 }

1714 }

1717 Edge E = std::make_pair(BB, Succ);

1718 uint64_t Weight = EdgeWeights[E];

1720

1721

1722

1723 if (Weight > std::numeric_limits<uint32_t>::max()) {

1724 LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)\n");

1725 Weight = std::numeric_limits<uint32_t>::max();

1726 }

1728

1729

1730 Weights.push_back(static_cast<uint32_t>(

1731 Weight == std::numeric_limits<uint32_t>::max() ? Weight

1732 : Weight + 1));

1733 } else {

1734

1735

1736 uint64_t W = Weight / EdgeMultiplicity[Succ];

1737

1738 if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])

1739 W++;

1740 Weights.push_back(static_cast<uint32_t>(W));

1741 }

1742 if (Weight != 0) {

1743 if (Weight > MaxWeight) {

1744 MaxWeight = Weight;

1745 MaxDestInst = &*Succ->getFirstNonPHIOrDbgOrLifetime();

1746 }

1747 }

1748 }

1749

1751

1752 uint64_t TempWeight;

1753

1754

1755

1756

1757

1758

1759

1760

1761 if (MaxWeight > 0 &&

1763 LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");

1765 ORE->emit([&]() {

1766 return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)

1767 << "most popular destination for conditional branches at "

1768 << ore::NV("CondBranchesLoc", BranchLoc);

1769 });

1770 } else {

1772 TI->setMetadata(LLVMContext::MD_prof, nullptr);

1773 LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");

1774 } else {

1775 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");

1776 }

1777 }

1778 }

1779}

1780

1781

1782

1783

1784

1785

1786

1787bool SampleProfileLoader::emitAnnotations(Function &F) {

1789

1792 if (!ProbeManager->getDesc(F))

1793 dbgs() << "Probe descriptor missing for Function " << F.getName()

1794 << "\n";

1795 });

1796

1797 if (ProbeManager->profileIsValid(F, *Samples)) {

1798 ++NumMatchedProfile;

1799 } else {

1800 ++NumMismatchedProfile;

1802 dbgs() << "Profile is invalid due to CFG mismatch for Function "

1803 << F.getName() << "\n");

1805 return false;

1806 }

1807 } else {

1808 if (getFunctionLoc(F) == 0)

1809 return false;

1810

1811 LLVM_DEBUG(dbgs() << "Line number for the first instruction in "

1812 << F.getName() << ": " << getFunctionLoc(F) << "\n");

1813 }

1814

1815 DenseSetGlobalValue::GUID InlinedGUIDs;

1817 Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);

1818 else

1819 Changed |= inlineHotFunctions(F, InlinedGUIDs);

1820

1821 Changed |= computeAndPropagateWeights(F, InlinedGUIDs);

1822

1824 generateMDProfMetadata(F);

1825

1826 emitCoverageRemarks(F);

1828}

1829

1830std::unique_ptr

1831SampleProfileLoader::buildProfiledCallGraph(Module &M) {

1832 std::unique_ptr ProfiledCG;

1834 ProfiledCG = std::make_unique(*ContextTracker);

1835 else

1836 ProfiledCG = std::make_unique(Reader->getProfiles());

1837

1838

1839

1840

1841 for (Function &F : M) {

1843 continue;

1844 ProfiledCG->addProfiledFunction(

1846 }

1847

1848 return ProfiledCG;

1849}

1850

1851std::vector<Function *>

1852SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {

1853 std::vector<Function *> FunctionOrderList;

1854 FunctionOrderList.reserve(M.size());

1855

1857 errs() << "WARNING: -use-profiled-call-graph ignored, should be used "

1858 "together with -sample-profile-top-down-load.\n";

1859

1862

1863

1864

1865

1866

1868 }

1869

1870 for (Function &F : M)

1872 FunctionOrderList.push_back(&F);

1873 return FunctionOrderList;

1874 }

1875

1878

1879

1880

1881

1882

1883

1884

1885

1886

1887

1888

1889

1890

1891

1892

1893

1894

1895

1896

1897

1898

1899

1900

1901

1902

1903

1904

1905

1906

1907

1908

1909

1910

1911

1912

1913

1914

1915

1916

1917

1918

1919

1920

1921

1922

1923

1924

1925

1926 std::unique_ptr ProfiledCG = buildProfiledCallGraph(M);

1927 scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());

1928 while (!CGI.isAtEnd()) {

1929 auto Range = *CGI;

1931

1932 scc_member_iterator<ProfiledCallGraph *> SI(*CGI);

1934 }

1935 for (auto *Node : Range) {

1938 FunctionOrderList.push_back(F);

1939 }

1940 ++CGI;

1941 }

1942 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());

1943 } else

1945

1947 dbgs() << "Function processing order:\n";

1948 for (auto F : FunctionOrderList) {

1949 dbgs() << F->getName() << "\n";

1950 }

1951 });

1952

1953 return FunctionOrderList;

1954}

1955

1956bool SampleProfileLoader::doInitialization(Module &M,

1958 auto &Ctx = M.getContext();

1959

1961 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);

1962 if (std::error_code EC = ReaderOrErr.getError()) {

1963 std::string Msg = "Could not open profile: " + EC.message();

1964 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));

1965 return false;

1966 }

1967 Reader = std::move(ReaderOrErr.get());

1968 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);

1969

1970

1971 Reader->setModule(&M);

1972 if (std::error_code EC = Reader->read()) {

1973 std::string Msg = "profile reading failed: " + EC.message();

1974 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));

1975 return false;

1976 }

1977

1978 PSL = Reader->getProfileSymbolList();

1979

1982

1983 if (UseFlattenedProfile)

1985 Reader->profileIsCS());

1986

1987

1988 ProfAccForSymsInList =

1990 if (ProfAccForSymsInList) {

1991 NamesInProfile.clear();

1992 GUIDsInProfile.clear();

1993 if (auto NameTable = Reader->getNameTable()) {

1995 for (auto Name : *NameTable)

1996 GUIDsInProfile.insert(Name.getHashCode());

1997 } else {

1998 for (auto Name : *NameTable)

1999 NamesInProfile.insert(Name.stringRef());

2000 }

2001 }

2002 CoverageTracker.setProfAccForSymsInList(true);

2003 }

2004

2007 M, *FAM, Ctx, nullptr,

2012 false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});

2013 }

2014

2015

2016 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||

2017 Reader->profileIsProbeBased()) {

2024

2029

2032

2033 if (Reader->profileIsPreInlined()) {

2036 }

2037

2038

2039

2040

2041

2042

2043 if (Reader->profileIsProbeBased()) {

2048 }

2049

2050 if (!Reader->profileIsCS()) {

2051

2052

2053

2054

2059 }

2060 }

2061

2062 if (Reader->profileIsCS()) {

2063

2064 ContextTracker = std::make_unique(

2065 Reader->getProfiles(), &GUIDToFuncNameMap);

2066 }

2067

2068

2069 if (Reader->profileIsProbeBased()) {

2070 ProbeManager = std::make_unique(M);

2071 if (!ProbeManager->moduleIsProbed(M)) {

2072 const char *Msg =

2073 "Pseudo-probe-based profile requires SampleProfileProbePass";

2074 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,

2076 return false;

2077 }

2078 }

2079

2082 MatchingManager = std::make_unique(

2083 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,

2084 FuncNameToProfNameMap);

2085 }

2086

2087 return true;

2088}

2089

2090

2091

2092

2093

2094

2095

2096

2097

2098

2099

2100bool SampleProfileLoader::rejectHighStalenessProfile(

2101 Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {

2103 "Only support for probe-based profile");

2104 uint64_t TotalHotFunc = 0;

2105 uint64_t NumMismatchedFunc = 0;

2106 for (const auto &I : Profiles) {

2107 const auto &FS = I.second;

2108 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());

2109 if (!FuncDesc)

2110 continue;

2111

2112

2114 FS.getTotalSamples()))

2115 continue;

2116

2117 TotalHotFunc++;

2118 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))

2119 NumMismatchedFunc++;

2120 }

2121

2122

2124 return false;

2125

2126

2127 if (NumMismatchedFunc * 100 >=

2129 auto &Ctx = M.getContext();

2130 const char *Msg =

2131 "The input profile significantly mismatches current source code. "

2132 "Please recollect profile to avoid performance regression.";

2133 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));

2134 return true;

2135 }

2136 return false;

2137}

2138

2139void SampleProfileLoader::removePseudoProbeInstsDiscriminator(Module &M) {

2140 for (auto &F : M) {

2141 std::vector<Instruction *> InstsToDel;

2142 for (auto &BB : F) {

2143 for (auto &I : BB) {

2145 InstsToDel.push_back(&I);

2147 if (const DILocation *DIL = I.getDebugLoc().get()) {

2148

2149 unsigned Discriminator = DIL->getDiscriminator();

2151 std::optional<uint32_t> DwarfDiscriminator =

2153 Discriminator);

2154 I.setDebugLoc(

2156 }

2157 }

2158 }

2159 }

2160 for (auto *I : InstsToDel)

2161 I->eraseFromParent();

2162 }

2163}

2164

2166 ProfileSummaryInfo *_PSI) {

2167 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);

2168

2169 PSI = _PSI;

2170 if (M.getProfileSummary( false) == nullptr) {

2171 M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),

2174 }

2175

2177 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))

2178 return false;

2179

2180 auto Remapper = Reader->getRemapper();

2181

2182 for (const auto &N_F : M.getValueSymbolTable()) {

2183 StringRef OrigName = N_F.getKey();

2185 if (F == nullptr || OrigName.empty())

2186 continue;

2187 SymbolMap[FunctionId(OrigName)] = F;

2189 if (OrigName != NewName && !NewName.empty()) {

2190 auto r = SymbolMap.emplace(FunctionId(NewName), F);

2191

2192

2193

2194

2195 if (!r.second)

2196 r.first->second = nullptr;

2197 OrigName = NewName;

2198 }

2199

2200 if (Remapper) {

2201 if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {

2202 if (*MapName != OrigName && !MapName->empty())

2203 SymbolMap.emplace(FunctionId(*MapName), F);

2204 }

2205 }

2206 }

2207

2208

2211 MatchingManager->runOnModule();

2212 MatchingManager->clearMatchingData();

2213 }

2214 assert(SymbolMap.count(FunctionId()) == 0 &&

2215 "No empty StringRef should be added in SymbolMap");

2217 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "

2218 "not enabled");

2219

2220 bool retval = false;

2221 for (auto *F : buildFunctionOrder(M, CG)) {

2222 assert(F->isDeclaration());

2223 clearFunctionData();

2225 }

2226

2227

2229 for (const std::pair<Function *, NotInlinedProfileInfo> &pair :

2230 notInlinedCallInfo)

2232

2235 removePseudoProbeInstsDiscriminator(M);

2237 M.eraseNamedMetadata(FuncInfo);

2238 }

2239

2240 return retval;

2241}

2242

2243bool SampleProfileLoader::runOnFunction(Function &F,

2245 LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");

2246 DILocation2SampleMap.clear();

2247

2248

2249

2250

2251 uint64_t initialEntryCount = -1;

2252

2255

2256

2257 initialEntryCount = 0;

2258

2259

2260 ProfAccForSymsInList = false;

2261 }

2262 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);

2263

2264

2265

2266

2267

2268 if (ProfAccForSymsInList) {

2269

2270 if (PSL->contains(F.getName()))

2271 initialEntryCount = 0;

2272

2273

2274

2275

2276

2277

2278

2279

2280

2281

2282

2285 GUIDsInProfile.count(

2286 Function::getGUIDAssumingExternalLinkage(CanonName))) ||

2288 initialEntryCount = -1;

2289 }

2290

2291

2292

2293 if (F.getEntryCount())

2296 .getManager();

2297 ORE = &FAM.getResult(F);

2298

2300 Samples = ContextTracker->getBaseSamplesFor(F);

2301 else {

2302 Samples = Reader->getSamplesFor(F);

2303

2304

2305 if (!Samples) {

2307 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));

2308 if (It != OutlineFunctionSamples.end()) {

2309 Samples = &It->second;

2310 } else if (auto Remapper = Reader->getRemapper()) {

2311 if (auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {

2312 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));

2313 if (It != OutlineFunctionSamples.end())

2314 Samples = &It->second;

2315 }

2316 }

2317 }

2318 }

2319

2320 if (Samples && !Samples->empty())

2321 return emitAnnotations(F);

2322 return false;

2323}

2325 std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,

2327 bool UseFlattenedProfile)

2328 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),

2329 LTOPhase(LTOPhase), FS(std::move(FS)),

2330 DisableSampleProfileInlining(DisableSampleProfileInlining),

2331 UseFlattenedProfile(UseFlattenedProfile) {}

2332

2337

2340 };

2343 };

2346 };

2347

2348 if (!FS)

2351

2352 SampleProfileLoader SampleLoader(

2355 : ProfileRemappingFileName,

2356 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,

2357 DisableSampleProfileInlining, UseFlattenedProfile);

2358 if (!SampleLoader.doInitialization(M, &FAM))

2360

2362 if (!SampleLoader.runOnModule(M, AM, PSI))

2364

2366}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

static bool runOnFunction(Function &F, bool PostInlining)

Provides ErrorOr smart pointer.

static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))

Module.h This file contains the declarations for the Module class.

This header defines various interfaces for pass management in LLVM.

static LVReader * CurrentReader

Implements a lazy call graph analysis and related passes for the new pass manager.

Legalize the Machine IR a function s Machine IR

Machine Check Debug Module

This file implements a map that provides insertion order iteration.

static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)

static const Function * getCalledFunction(const Value *V)

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

FunctionAnalysisManager FAM

This file defines the PriorityQueue class.

This file contains the declarations for profiling metadata utility functions.

This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...

std::pair< BasicBlock *, BasicBlock * > Edge

This file provides the interface for context-sensitive profile tracker used by CSSPGO.

This file provides the interface for the sampled PGO profile loader base implementation.

This file provides the utility functions for the sampled PGO loader base implementation.

This file provides the interface for SampleProfileMatcher.

This file provides the interface for the pseudo probe implementation for AutoFDO.

static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))

static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))

static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))

static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)

static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))

static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)

Update indirect call target profile metadata for Inst.

Definition SampleProfile.cpp:839

static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))

static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)

static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))

static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)

Check whether the indirect call promotion history of Inst allows the promotion for Candidate.

Definition SampleProfile.cpp:806

static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)

Returns the sorted CallTargetMap M by count in descending order.

Definition SampleProfile.cpp:1601

#define CSINLINE_DEBUG

Definition SampleProfile.cpp:98

static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)

static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))

static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))

static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))

static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))

static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", ""), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", ":"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "."), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", ":. (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)

static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))

This file provides the interface for the sampled PGO loader pass.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

Defines the virtual file system interface vfs::FileSystem.

bool empty() const

Returns true if the analysis manager has an empty results cache.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

A function analysis which provides an AssumptionCache.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

const Function * getParent() const

Return the enclosing method, or null if none.

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

static bool isPseudoProbeDiscriminator(unsigned Discriminator)

const DILocation * cloneWithDiscriminator(unsigned Discriminator) const

Returns a new DILocation with updated Discriminator.

LLVM_ABI unsigned getLine() const

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Represents either an error or a value T.

Class to represent profile counts.

DISubprogram * getSubprogram() const

Get the attached subprogram.

static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)

Return a 64-bit global unique ID constructed from the name of a global symbol.

LLVM_ABI bool isDeclaration() const

Return true if the primary definition of this global value is outside of the current translation unit...

static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)

static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)

static InlineCost get(int Cost, int Threshold, int StaticBonus=0)

LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY

Return the number of successors that this instruction has.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const

Retrieve total raw weight values of a branch.

LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY

Return the specified successor. This instruction must be a terminator.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...

LLVM_ABI void diagnose(const DiagnosticInfo &DI)

Report a message to the currently installed diagnostic handler.

An analysis pass which computes the call graph for a module.

A lazily constructed view of the call graph of a module.

VectorType::iterator erase(typename VectorType::iterator Iterator)

Remove the element given by Iterator.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

ValueT lookup(const KeyT &Key) const

A Module instance is used to store all the information related to an LLVM module.

PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...

An analysis pass based on the new PM to deliver ProfileSummaryInfo.

Analysis providing profile information.

LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)

If a summary is provided as argument, use that.

LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const

Returns true if count C is considered hot with regard to a given hot percentile cutoff value.

void computeDominanceAndLoopInfo(FunctionT &F)

PostDominatorTreePtrT PDT

LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

Definition SampleProfile.cpp:2333

LLVM_ABI SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)

Definition SampleProfile.cpp:2324

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

iterator insert(iterator I, T &&Elt)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

size_type count(StringRef Key) const

count - Return 1 if the element is in the map, 0 otherwise.

StringRef - Represent a constant reference to a string, i.e.

constexpr bool empty() const

empty - Check if the string is empty.

std::pair< typename Base::iterator, bool > insert(StringRef key)

Analysis pass providing the TargetTransformInfo.

Analysis pass providing the TargetLibraryInfo.

Provides information about what library functions are available for the current target.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM Value Representation.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

int getNumOccurrences() const

std::pair< iterator, bool > insert(const ValueT &V)

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

const ParentTy * getParent() const

Representation of the samples collected for a function.

void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const

Recursively traverses all children, if the total sample count of the corresponding function is no les...

static LLVM_ABI bool ProfileIsCS

FunctionId getFunction() const

Return the function name.

static LLVM_ABI bool ProfileIsProbeBased

static StringRef getCanonicalFnName(const Function &F)

Return the canonical name for a function, taking into account suffix elision policy attributes.

void setContextSynthetic()

SampleContext & getContext() const

sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)

Merge the samples in Other into this one.

static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)

Returns a unique call site identifier for a given debug location of a call instruction.

uint64_t getHeadSamplesEstimate() const

Return an estimate of the sample count of the function entry basic block.

uint64_t getGUID() const

Return the GUID of the context's name.

const BodySampleMap & getBodySamples() const

Return all the samples collected in the body of the function.

static LLVM_ABI bool UseMD5

Whether the profile uses MD5 to represent string.

static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)

bool hasAttribute(ContextAttributeMask A)

Sample-based profile reader.

static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")

Create a sample profile reader appropriate to the file format.

std::unordered_map< FunctionId, uint64_t > CallTargetMap

static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)

Sort call targets in descending order of call frequency.

static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)

Prorate call targets by a distribution factor.

bool isAtEnd() const

Direct loop termination test which is more efficient than comparison with end().

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)

checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...

DiagnosticInfoOptimizationBase::Argument NV

LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)

NodeAddr< NodeBase * > Node

NodeAddr< FuncNode * > Func

static FunctionId getRepInFormat(StringRef Name)

Get the proper representation of a string according to whether the current Format uses MD5 to represe...

@ ContextDuplicatedIntoBase

std::map< FunctionId, FunctionSamples > FunctionSamplesMap

bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)

Return true if the given callsite is hot wrt to hot cutoff threshold.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()

Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.

This is an optimization pass for GlobalISel generic memory operations.

static bool isIndirectCall(const MachineInstr &MI)

cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))

cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))

LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)

Return true if the given indirect call site can be made to call Callee.

LLVM_ABI cl::opt< int > ProfileInlineLimitMin

bool succ_empty(const Instruction *I)

LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)

This function inlines the called function into the basic block of the caller.

static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))

static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

scc_iterator< T > scc_begin(const T &G)

Construct the begin iterator for a deduced graph type T.

static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))

static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))

InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy

Provide the FunctionAnalysisManager to Module proxy.

static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)

LLVM_ABI void setProbeDistributionFactor(Instruction &Inst, float Factor)

LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)

Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...

static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))

LLVM_ABI std::string AnnotateInlinePassName(InlineContext IC)

ThinOrFullLTOPhase

This enumerates the LLVM full LTO or ThinLTO optimization phases.

cl::opt< bool > SampleProfileUseProfi

LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)

Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...

void sort(IteratorTy Start, IteratorTy End)

llvm:🆑:opt< bool > UseIterativeBFIInference

LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)

Emit ORE message based in cost (default heuristic).

FunctionAddr VTableAddr Count

Function::ProfileCount ProfileCount

static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)

LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)

Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI cl::opt< int > SampleHotCallSiteThreshold

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)

Get an InlineCost object representing the cost of inlining this callsite.

FunctionAddr VTableAddr uintptr_t uintptr_t Data

LLVM_ABI void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)

Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...

cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))

LLVM_ABI cl::opt< int > SampleColdCallSiteThreshold

LLVM_ABI InlineParams getInlineParams()

Generate the parameters to tune the inline cost analysis based only on the commandline options.

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

static bool skipProfileForFunction(const Function &F)

LLVM_ABI cl::opt< bool > SortProfiledSCC

static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)

cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))

static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))

static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite"))

LLVM_ABI cl::opt< int > ProfileInlineLimitMax

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

cl::opt< bool > EnableExtTspBlockPlacement

Definition SampleProfile.cpp:340

const uint64_t NOMORE_ICP_MAGICNUM

Magic number in the value profile metadata showing a target has been promoted for the instruction and...

LLVM_ABI cl::opt< int > ProfileInlineGrowthLimit

static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))

AnalysisManager< Module > ModuleAnalysisManager

Convenience typedef for the Module analysis manager.

constexpr const char * PseudoProbeDescMetadataName

Implement std::hash so that hash_code can be used in STL containers.

@ LineColumnDiscriminator

A wrapper of binary function with basic blocks and jumps.

std::optional< bool > AllowRecursiveCall

Indicate whether we allow inlining for recursive call.

std::optional< bool > ComputeFullInlineCost

Compute inline cost even when the cost has exceeded the threshold.

static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)