LLVM: lib/Analysis/InlineCost.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

33#include "llvm/Config/llvm-config.h"

49#include

50#include

51#include

52

53using namespace llvm;

54

55#define DEBUG_TYPE "inline-cost"

56

57STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");

58

61 cl::desc("Default amount of inlining to perform"));

62

63

64

65

66

67

70 cl::desc("Ignore TTI attributes compatibility check between callee/caller "

71 "during inline cost calculation"));

72

75 cl::desc("Prints comments for instruction based on inline cost analysis"));

76

79 cl::desc("Control the amount of inlining to perform (default = 225)"));

80

83 cl::desc("Threshold for inlining functions with inline hint"));

84

88 cl::desc("Threshold for inlining cold callsites"));

89

92 cl::desc("Enable the cost-benefit analysis for the inliner"));

93

94

95

96

99 cl::desc("Multiplier to multiply cycle savings by during inlining"));

100

101

102

103

106 cl::desc("A multiplier on top of cycle savings to decide whether the "

107 "savings won't justify the cost"));

108

111 cl::desc("The maximum size of a callee that get's "

112 "inlined without sufficient cycle savings"));

113

114

115

116

119 cl::desc("Threshold for inlining functions with cold attribute"));

120

123 cl::desc("Threshold for hot callsites "));

124

127 cl::desc("Threshold for locally hot callsites "));

128

131 cl::desc("Maximum block frequency, expressed as a percentage of caller's "

132 "entry frequency, for a callsite to be cold in the absence of "

133 "profile information."));

134

137 cl::desc("Minimum block frequency, expressed as a multiple of caller's "

138 "entry frequency, for a callsite to be hot in the absence of "

139 "profile information."));

140

143 cl::desc("Cost of a single instruction when inlining"));

144

147 cl::desc("Cost of a single inline asm instruction when inlining"));

148

151 cl::desc("Cost of load/store instruction when inlining"));

152

155 cl::desc("Call penalty that is applied per callsite when inlining"));

156

159 cl::init(std::numeric_limits<size_t>::max()),

160 cl::desc("Do not inline functions with a stack size "

161 "that exceeds the specified limit"));

162

164 "recursive-inline-max-stacksize", cl::Hidden,

166 cl::desc("Do not inline recursive functions with a stack "

167 "size that exceeds the specified limit"));

168

171 cl::desc("Compute the full inline cost of a call site even when the cost "

172 "exceeds the threshold."));

173

176 cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "

177 "attributes."));

178

181 cl::desc("Disables evaluation of GetElementPtr with constant operands"));

182

185 cl::desc("Inline all viable calls, even if they exceed the inlining "

186 "threshold"));

187namespace llvm {

190 int AttrValue = 0;

192 return AttrValue;

193 }

194 return std::nullopt;

195}

196

200

204

207

208}

209

210}

211

212namespace {

213class InlineCostCallAnalyzer;

214

215

216

217struct InstructionCostDetail {

218 int CostBefore = 0;

219 int CostAfter = 0;

220 int ThresholdBefore = 0;

221 int ThresholdAfter = 0;

222

223 int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }

224

225 int getCostDelta() const { return CostAfter - CostBefore; }

226

227 bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }

228};

229

231private:

232 InlineCostCallAnalyzer *const ICCA;

233

234public:

235 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}

236 void emitInstructionAnnot(const Instruction *I,

237 formatted_raw_ostream &OS) override;

238};

239

240

241

242

243

244

245

246

247

248class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {

249 typedef InstVisitor<CallAnalyzer, bool> Base;

250 friend class InstVisitor<CallAnalyzer, bool>;

251

252protected:

253 virtual ~CallAnalyzer() = default;

254

255 const TargetTransformInfo &TTI;

256

257

258 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;

259

260

261 function_ref<BlockFrequencyInfo &(Function &)> GetBFI;

262

263

264 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;

265

266

267 ProfileSummaryInfo *PSI;

268

269

271

272

273 const DataLayout &DL;

274

275

276 OptimizationRemarkEmitter *ORE;

277

278

279

280

281 CallBase &CandidateCall;

282

283

284 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache = nullptr;

285

286

287

288 virtual void onBlockStart(const BasicBlock *BB) {}

289

290

291 virtual void onBlockAnalyzed(const BasicBlock *BB) {}

292

293

294 virtual void onInstructionAnalysisStart(const Instruction *I) {}

295

296

297 virtual void onInstructionAnalysisFinish(const Instruction *I) {}

298

299

300

301

303

304

305

306

307 virtual bool shouldStop() { return false; }

308

309

310

311

312

314

315

316 virtual void onDisableSROA(AllocaInst *Arg) {}

317

318

319 virtual void onDisableLoadElimination() {}

320

321

322

323 virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }

324

325

326 virtual void onCallPenalty() {}

327

328

329 virtual void onMemAccess(){};

330

331

332

333 virtual void onLoadEliminationOpportunity() {}

334

335

336

337 virtual void onCallArgumentSetup(const CallBase &Call) {}

338

339

340 virtual void onLoadRelativeIntrinsic() {}

341

342

343 virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {

344 }

345

346

347

348 virtual bool onJumpTable(unsigned JumpTableSize) { return true; }

349

350

351

352 virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }

353

354

355

356 virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,

357 bool DefaultDestUnreachable) {}

358

359

360

361 virtual void onMissedSimplification() {}

362

363

364 virtual void onInlineAsm(const InlineAsm &Arg) {}

365

366

367 virtual void onInitializeSROAArg(AllocaInst *Arg) {}

368

369

370 virtual void onAggregateSROAUse(AllocaInst *V) {}

371

372 bool handleSROA(Value *V, bool DoNotDisable) {

373

374 if (auto *SROAArg = getSROAArgForValueOrNull(V)) {

375 if (DoNotDisable) {

376 onAggregateSROAUse(SROAArg);

377 return true;

378 }

379 disableSROAForArg(SROAArg);

380 }

381 return false;

382 }

383

384 bool IsCallerRecursive = false;

385 bool IsRecursiveCall = false;

386 bool ExposesReturnsTwice = false;

387 bool HasDynamicAlloca = false;

388 bool ContainsNoDuplicateCall = false;

389 bool HasReturn = false;

390 bool HasIndirectBr = false;

391 bool HasUninlineableIntrinsic = false;

392 bool InitsVargArgs = false;

393

394

395 uint64_t AllocatedSize = 0;

396 unsigned NumInstructions = 0;

397 unsigned NumInlineAsmInstructions = 0;

398 unsigned NumVectorInstructions = 0;

399

400

401

402

403

404

405

406

407

408 DenseMap<Value *, Value *> SimplifiedValues;

409

410

411

412 DenseMap<Value *, AllocaInst *> SROAArgValues;

413

414

415 DenseSet<AllocaInst *> EnabledSROAAllocas;

416

417

418 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;

419

420

421 SmallPtrSet<BasicBlock *, 16> DeadBlocks;

422

423

424

425 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;

426

427

428

429

430 bool EnableLoadElimination = true;

431

432

433 bool AllowRecursiveCall = false;

434

435 SmallPtrSet<Value *, 16> LoadAddrSet;

436

437 AllocaInst *getSROAArgForValueOrNull(Value *V) const {

438 auto It = SROAArgValues.find(V);

439 if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)

440 return nullptr;

441 return It->second;

442 }

443

444

445

446 template T *getDirectOrSimplifiedValue(Value *V) const {

449 return getSimplifiedValue(V);

450 }

451

452

453 bool isAllocaDerivedArg(Value *V);

454 void disableSROAForArg(AllocaInst *SROAArg);

455 void disableSROA(Value *V);

456 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);

457 void disableLoadElimination();

458 bool isGEPFree(GetElementPtrInst &GEP);

459 bool canFoldInboundsGEP(GetElementPtrInst &I);

460 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);

461 bool simplifyCallSite(Function *F, CallBase &Call);

462 bool simplifyCmpInstForRecCall(CmpInst &Cmp);

464 bool simplifyIntrinsicCallIsConstant(CallBase &CB);

465 bool simplifyIntrinsicCallObjectSize(CallBase &CB);

466 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);

467 bool isLoweredToCall(Function *F, CallBase &Call);

468

469

470

471

472

473

474 bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);

475

476

477

478 bool isKnownNonNullInCallee(Value *V);

479

480

481 bool allowSizeGrowth(CallBase &Call);

482

483

484 InlineResult analyzeBlock(BasicBlock *BB,

485 const SmallPtrSetImpl<const Value *> &EphValues);

486

487

488

491 void visit(Function *);

492 void visit(Function &);

493 void visit(BasicBlock *);

494 void visit(BasicBlock &);

495

496

497 bool visitInstruction(Instruction &I);

498

499

500 bool visitAlloca(AllocaInst &I);

501 bool visitPHI(PHINode &I);

502 bool visitGetElementPtr(GetElementPtrInst &I);

503 bool visitBitCast(BitCastInst &I);

504 bool visitPtrToInt(PtrToIntInst &I);

505 bool visitIntToPtr(IntToPtrInst &I);

506 bool visitCastInst(CastInst &I);

507 bool visitCmpInst(CmpInst &I);

508 bool visitSub(BinaryOperator &I);

509 bool visitBinaryOperator(BinaryOperator &I);

510 bool visitFNeg(UnaryOperator &I);

511 bool visitLoad(LoadInst &I);

512 bool visitStore(StoreInst &I);

513 bool visitExtractValue(ExtractValueInst &I);

514 bool visitInsertValue(InsertValueInst &I);

515 bool visitCallBase(CallBase &Call);

516 bool visitReturnInst(ReturnInst &RI);

517 bool visitBranchInst(BranchInst &BI);

518 bool visitSelectInst(SelectInst &SI);

519 bool visitSwitchInst(SwitchInst &SI);

520 bool visitIndirectBrInst(IndirectBrInst &IBI);

521 bool visitResumeInst(ResumeInst &RI);

522 bool visitCleanupReturnInst(CleanupReturnInst &RI);

523 bool visitCatchReturnInst(CatchReturnInst &RI);

524 bool visitUnreachableInst(UnreachableInst &I);

525

526public:

527 CallAnalyzer(

528 Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,

529 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,

530 function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,

531 function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,

532 ProfileSummaryInfo *PSI = nullptr,

533 OptimizationRemarkEmitter *ORE = nullptr,

534 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =

535 nullptr)

536 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),

537 GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),

538 CandidateCall(Call), GetEphValuesCache(GetEphValuesCache) {}

539

540 InlineResult analyze();

541

542

543 Value *getSimplifiedValueUnchecked(Value *V) const {

544 return SimplifiedValues.lookup(V);

545 }

546

547

548

549 template T *getSimplifiedValue(Value *V) const {

550 Value *SimpleV = SimplifiedValues.lookup(V);

551 if (!SimpleV)

552 return nullptr;

553

554

555

556 if constexpr (std::is_base_of_v<Constant, T>)

558

559

561 if (I->getFunction() != &F)

562 return nullptr;

564 if (Arg->getParent() != &F)

565 return nullptr;

567 return nullptr;

569 }

570

571

572

573 unsigned NumConstantArgs = 0;

574 unsigned NumConstantOffsetPtrArgs = 0;

575 unsigned NumAllocaArgs = 0;

576 unsigned NumConstantPtrCmps = 0;

577 unsigned NumConstantPtrDiffs = 0;

578 unsigned NumInstructionsSimplified = 0;

579

581};

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598int64_t getExpectedNumberOfCompare(int NumCaseCluster) {

599 return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;

600}

601

602

603

604class InlineCostCallAnalyzer final : public CallAnalyzer {

605 const bool ComputeFullInlineCost;

606 int LoadEliminationCost = 0;

607

608

609 int VectorBonus = 0;

610

611 int SingleBBBonus = 0;

612

613

614 const InlineParams &Params;

615

616

617

618

619 DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;

620

621

622

623 int Threshold = 0;

624

625

626 int StaticBonusApplied = 0;

627

628

629 const bool BoostIndirectCalls;

630

631

632 const bool IgnoreThreshold;

633

634

635 const bool CostBenefitAnalysisEnabled;

636

637

638

639

640

641 int Cost = 0;

642

643

644

645

646 int CostAtBBStart = 0;

647

648

649

650 int ColdSize = 0;

651

652

653 bool DecidedByCostThreshold = false;

654

655

656 bool DecidedByCostBenefit = false;

657

658

659 std::optional CostBenefit;

660

661 bool SingleBB = true;

662

663 unsigned SROACostSavings = 0;

664 unsigned SROACostSavingsLost = 0;

665

666

667

668

669 DenseMap<AllocaInst *, int> SROAArgCosts;

670

671

673

674

675

676

677

678 void updateThreshold(CallBase &Call, Function &Callee);

679

680 std::optional getHotCallSiteThreshold(CallBase &Call,

681 BlockFrequencyInfo *CallerBFI);

682

683

684 void addCost(int64_t Inc) {

685 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);

686 Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX);

687 }

688

689 void onDisableSROA(AllocaInst *Arg) override {

690 auto CostIt = SROAArgCosts.find(Arg);

691 if (CostIt == SROAArgCosts.end())

692 return;

693 addCost(CostIt->second);

694 SROACostSavings -= CostIt->second;

695 SROACostSavingsLost += CostIt->second;

696 SROAArgCosts.erase(CostIt);

697 }

698

699 void onDisableLoadElimination() override {

700 addCost(LoadEliminationCost);

701 LoadEliminationCost = 0;

702 }

703

704 bool onCallBaseVisitStart(CallBase &Call) override {

705 if (std::optional AttrCallThresholdBonus =

707 Threshold += *AttrCallThresholdBonus;

708

709 if (std::optional AttrCallCost =

711 addCost(*AttrCallCost);

712

713

714 return false;

715 }

716 return true;

717 }

718

719 void onCallPenalty() override { addCost(CallPenalty); }

720

721 void onMemAccess() override { addCost(MemAccessCost); }

722

723 void onCallArgumentSetup(const CallBase &Call) override {

724

725

727 }

728 void onLoadRelativeIntrinsic() override {

729

731 }

732 void onLoweredCall(Function *F, CallBase &Call,

733 bool IsIndirectCall) override {

734

736

737

738

739

740

741

742 if (IsIndirectCall && BoostIndirectCalls) {

743 auto IndirectCallParams = Params;

744 IndirectCallParams.DefaultThreshold =

746

747

748 InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,

749 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,

750 false);

751 if (CA.analyze().isSuccess()) {

752

753

754 addCost(-std::max(0, CA.getThreshold() - CA.getCost()));

755 }

756 } else

757

760 }

761

762 void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,

763 bool DefaultDestUnreachable) override {

764

765

766

767 if (JumpTableSize) {

768

769

770 if (!DefaultDestUnreachable)

772

773 int64_t JTCost =

775 addCost(JTCost);

776 return;

777 }

778

779 if (NumCaseCluster <= 3) {

780

781

782

783 addCost((NumCaseCluster - DefaultDestUnreachable) * 2 * InstrCost);

784 return;

785 }

786

787 int64_t ExpectedNumberOfCompare =

788 getExpectedNumberOfCompare(NumCaseCluster);

789 int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;

790

791 addCost(SwitchCost);

792 }

793

794

795

796

797 void onInlineAsm(const InlineAsm &Arg) override {

799 return;

802 int SectionLevel = 0;

803 int InlineAsmInstrCount = 0;

804 for (StringRef AsmStr : AsmStrs) {

805

806 StringRef Trimmed = AsmStr.trim();

807 size_t hashPos = Trimmed.find('#');

809 Trimmed = Trimmed.substr(0, hashPos);

810

811 if (Trimmed.empty())

812 continue;

813

814

815

816

817 if (Trimmed.starts_with(".pushsection")) {

818 ++SectionLevel;

819 continue;

820 }

821 if (Trimmed.starts_with(".popsection")) {

822 --SectionLevel;

823 continue;

824 }

825

827 continue;

828 if (SectionLevel == 0)

829 ++InlineAsmInstrCount;

830 }

831 NumInlineAsmInstructions += InlineAsmInstrCount;

833 }

834

835 void onMissedSimplification() override { addCost(InstrCost); }

836

837 void onInitializeSROAArg(AllocaInst *Arg) override {

838 assert(Arg != nullptr &&

839 "Should not initialize SROA costs for null value.");

841 SROACostSavings += SROAArgCost;

842 SROAArgCosts[Arg] = SROAArgCost;

843 }

844

845 void onAggregateSROAUse(AllocaInst *SROAArg) override {

846 auto CostIt = SROAArgCosts.find(SROAArg);

847 assert(CostIt != SROAArgCosts.end() &&

848 "expected this argument to have a cost");

851 }

852

853 void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }

854

855 void onBlockAnalyzed(const BasicBlock *BB) override {

856 if (CostBenefitAnalysisEnabled) {

857

858

859 assert(GetBFI && "GetBFI must be available");

860 BlockFrequencyInfo *BFI = &(GetBFI(F));

861 assert(BFI && "BFI must be available");

864 ColdSize += Cost - CostAtBBStart;

865 }

866

868

869

870

871

872 if (SingleBB && TI->getNumSuccessors() > 1) {

873

874 Threshold -= SingleBBBonus;

875 SingleBB = false;

876 }

877 }

878

879 void onInstructionAnalysisStart(const Instruction *I) override {

880

881

883 return;

884 auto &CostDetail = InstructionCostDetailMap[I];

885 CostDetail.CostBefore = Cost;

886 CostDetail.ThresholdBefore = Threshold;

887 }

888

889 void onInstructionAnalysisFinish(const Instruction *I) override {

890

891

893 return;

894 auto &CostDetail = InstructionCostDetailMap[I];

895 CostDetail.CostAfter = Cost;

896 CostDetail.ThresholdAfter = Threshold;

897 }

898

899 bool isCostBenefitAnalysisEnabled() {

900 if (!PSI || !PSI->hasProfileSummary())

901 return false;

902

903 if (!GetBFI)

904 return false;

905

907

909 return false;

910 } else {

911

912 if (!PSI->hasInstrumentationProfile())

913 return false;

914 }

915

917 if (Caller->getEntryCount())

918 return false;

919

920 BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));

921 if (!CallerBFI)

922 return false;

923

924

925 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))

926 return false;

927

928

929 auto EntryCount = F.getEntryCount();

930 if (!EntryCount || !EntryCount->getCount())

931 return false;

932

933 BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));

934 if (!CalleeBFI)

935 return false;

936

937 return true;

938 }

939

940

941 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {

945 }

946

947

948 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {

952 }

953

954 void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {

956 CandidateCall, "inline-cycle-savings-for-test")) {

957 CycleSavings = *AttrCycleSavings;

958 }

959

961 CandidateCall, "inline-runtime-cost-for-test")) {

962 Size = *AttrRuntimeCost;

963 }

964 }

965

966

967

968

969 std::optional costBenefitAnalysis() {

970 if (!CostBenefitAnalysisEnabled)

971 return std::nullopt;

972

973

974

975

976

977 if (Threshold == 0)

978 return std::nullopt;

979

981 BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));

983

984

985

986

987

988

989

990

991

992

993

994 APInt CycleSavings(128, 0);

995

996 for (auto &BB : F) {

997 APInt CurrentSavings(128, 0);

998 for (auto &I : BB) {

1000

1001 if (BI->isConditional() &&

1002 getSimplifiedValue(BI->getCondition())) {

1004 }

1006 if (getSimplifiedValue(SI->getCondition()))

1009

1010 if (SimplifiedValues.count(V)) {

1012 }

1013 }

1014 }

1015

1018 CycleSavings += CurrentSavings;

1019 }

1020

1021

1022 auto EntryProfileCount = F.getEntryCount();

1023 assert(EntryProfileCount && EntryProfileCount->getCount());

1024 auto EntryCount = EntryProfileCount->getCount();

1025 CycleSavings += EntryCount / 2;

1026 CycleSavings = CycleSavings.udiv(EntryCount);

1027

1028

1029 auto *CallerBB = CandidateCall.getParent();

1030 BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));

1033

1034

1035

1036

1037 int Size = Cost - ColdSize;

1038

1039

1040

1042

1043 OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);

1044 CostBenefit.emplace(APInt(128, Size), CycleSavings);

1045

1046

1047

1048

1049

1050

1051

1052

1053

1054

1055

1056

1057

1058

1059

1060

1061

1062

1063

1064

1065

1066

1067 APInt Threshold(128, PSI->getOrCompHotCountThreshold());

1068 Threshold *= Size;

1069

1070 APInt UpperBoundCycleSavings = CycleSavings;

1071 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();

1072 if (UpperBoundCycleSavings.uge(Threshold))

1073 return true;

1074

1075 APInt LowerBoundCycleSavings = CycleSavings;

1076 LowerBoundCycleSavings *=

1077 getInliningCostBenefitAnalysisProfitableMultiplier();

1078 if (LowerBoundCycleSavings.ult(Threshold))

1079 return false;

1080

1081

1082 return std::nullopt;

1083 }

1084

1085 InlineResult finalizeAnalysis() override {

1086

1087

1088

1089

1090

1092 if (Caller->hasMinSize()) {

1093 DominatorTree DT(F);

1094 LoopInfo LI(DT);

1095 int NumLoops = 0;

1096 for (Loop *L : LI) {

1097

1098 if (DeadBlocks.count(L->getHeader()))

1099 continue;

1100 NumLoops++;

1101 }

1103 }

1104

1105

1106

1107

1108 if (NumVectorInstructions <= NumInstructions / 10)

1109 Threshold -= VectorBonus;

1110 else if (NumVectorInstructions <= NumInstructions / 2)

1111 Threshold -= VectorBonus / 2;

1112

1113 if (std::optional AttrCost =

1115 Cost = *AttrCost;

1116

1118 CandidateCall,

1120 Cost *= *AttrCostMult;

1121

1122 if (std::optional AttrThreshold =

1124 Threshold = *AttrThreshold;

1125

1126 if (auto Result = costBenefitAnalysis()) {

1127 DecidedByCostBenefit = true;

1128 if (*Result)

1130 else

1132 }

1133

1134 if (IgnoreThreshold)

1136

1137 DecidedByCostThreshold = true;

1138 return Cost < std::max(1, Threshold)

1140 : InlineResult::failure("Cost over threshold.");

1141 }

1142

1143 bool shouldStop() override {

1144 if (IgnoreThreshold || ComputeFullInlineCost)

1145 return false;

1146

1147

1148 if (Cost < Threshold)

1149 return false;

1150 DecidedByCostThreshold = true;

1151 return true;

1152 }

1153

1154 void onLoadEliminationOpportunity() override {

1155 LoadEliminationCost += InstrCost;

1156 }

1157

1158 InlineResult onAnalysisStart() override {

1159

1160

1161

1162

1163

1164

1165

1166

1167

1168

1169 assert(NumInstructions == 0);

1170 assert(NumVectorInstructions == 0);

1171

1172

1173 updateThreshold(CandidateCall, F);

1174

1175

1176

1177

1178 assert(Threshold >= 0);

1179 assert(SingleBBBonus >= 0);

1180 assert(VectorBonus >= 0);

1181

1182

1183

1184

1185 Threshold += (SingleBBBonus + VectorBonus);

1186

1187

1188

1190

1191

1192

1193 if (F.getCallingConv() == CallingConv::Cold)

1195

1196 LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");

1197

1198

1199 if (Cost >= Threshold && !ComputeFullInlineCost)

1201

1203 }

1204

1205public:

1206 InlineCostCallAnalyzer(

1207 Function &Callee, CallBase &Call, const InlineParams &Params,

1208 const TargetTransformInfo &TTI,

1209 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,

1210 function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,

1211 function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,

1212 ProfileSummaryInfo *PSI = nullptr,

1213 OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,

1214 bool IgnoreThreshold = false,

1215 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =

1216 nullptr)

1217 : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,

1218 ORE, GetEphValuesCache),

1220 Params.ComputeFullInlineCost || ORE ||

1221 isCostBenefitAnalysisEnabled()),

1223 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),

1224 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),

1225 Writer(this) {

1226 AllowRecursiveCall = *Params.AllowRecursiveCall;

1227 }

1228

1229

1230 InlineCostAnnotationWriter Writer;

1231

1232 void dump();

1233

1234

1235

1236 void print(raw_ostream &OS);

1237

1238 std::optional getCostDetails(const Instruction *I) {

1239 auto It = InstructionCostDetailMap.find(I);

1240 if (It != InstructionCostDetailMap.end())

1241 return It->second;

1242 return std::nullopt;

1243 }

1244

1245 ~InlineCostCallAnalyzer() override = default;

1246 int getThreshold() const { return Threshold; }

1247 int getCost() const { return Cost; }

1248 int getStaticBonusApplied() const { return StaticBonusApplied; }

1249 std::optional getCostBenefitPair() { return CostBenefit; }

1250 bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }

1251 bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }

1252};

1253

1254

1255static bool isSoleCallToLocalFunction(const CallBase &CB,

1257 return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&

1259}

1260

1261class InlineCostFeaturesAnalyzer final : public CallAnalyzer {

1262private:

1264

1265

1266

1267

1268 static constexpr int JTCostMultiplier = 2;

1269 static constexpr int CaseClusterCostMultiplier = 2;

1270 static constexpr int SwitchDefaultDestCostMultiplier = 2;

1271 static constexpr int SwitchCostMultiplier = 2;

1272

1273

1274

1275 unsigned SROACostSavingOpportunities = 0;

1276 int VectorBonus = 0;

1277 int SingleBBBonus = 0;

1278 int Threshold = 5;

1279

1280 DenseMap<AllocaInst *, unsigned> SROACosts;

1281

1283 Cost[static_cast<size_t>(Feature)] += Delta;

1284 }

1285

1287 Cost[static_cast<size_t>(Feature)] = Value;

1288 }

1289

1290 void onDisableSROA(AllocaInst *Arg) override {

1291 auto CostIt = SROACosts.find(Arg);

1292 if (CostIt == SROACosts.end())

1293 return;

1294

1295 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);

1296 SROACostSavingOpportunities -= CostIt->second;

1297 SROACosts.erase(CostIt);

1298 }

1299

1300 void onDisableLoadElimination() override {

1301 set(InlineCostFeatureIndex::load_elimination, 1);

1302 }

1303

1304 void onCallPenalty() override {

1305 increment(InlineCostFeatureIndex::call_penalty, CallPenalty);

1306 }

1307

1308 void onCallArgumentSetup(const CallBase &Call) override {

1309 increment(InlineCostFeatureIndex::call_argument_setup,

1311 }

1312

1313 void onLoadRelativeIntrinsic() override {

1314 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 * InstrCost);

1315 }

1316

1317 void onLoweredCall(Function *F, CallBase &Call,

1318 bool IsIndirectCall) override {

1319 increment(InlineCostFeatureIndex::lowered_call_arg_setup,

1321

1322 if (IsIndirectCall) {

1323 InlineParams IndirectCallParams = { 0,

1324 {},

1325 {},

1326 {},

1327 {},

1328 {},

1329 {},

1330 {},

1331 true,

1332 true};

1335

1336 InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,

1337 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,

1338 false, true);

1339 if (CA.analyze().isSuccess()) {

1340 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,

1341 CA.getCost());

1342 increment(InlineCostFeatureIndex::nested_inlines, 1);

1343 }

1344 } else {

1345 onCallPenalty();

1346 }

1347 }

1348

1349 void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,

1350 bool DefaultDestUnreachable) override {

1351 if (JumpTableSize) {

1352 if (!DefaultDestUnreachable)

1353 increment(InlineCostFeatureIndex::switch_default_dest_penalty,

1354 SwitchDefaultDestCostMultiplier * InstrCost);

1355 int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +

1357 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);

1358 return;

1359 }

1360

1361 if (NumCaseCluster <= 3) {

1362 increment(InlineCostFeatureIndex::case_cluster_penalty,

1363 (NumCaseCluster - DefaultDestUnreachable) *

1364 CaseClusterCostMultiplier * InstrCost);

1365 return;

1366 }

1367

1368 int64_t ExpectedNumberOfCompare =

1369 getExpectedNumberOfCompare(NumCaseCluster);

1370

1371 int64_t SwitchCost =

1372 ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;

1373 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);

1374 }

1375

1376 void onMissedSimplification() override {

1377 increment(InlineCostFeatureIndex::unsimplified_common_instructions,

1379 }

1380

1381 void onInitializeSROAArg(AllocaInst *Arg) override {

1383 SROACosts[Arg] = SROAArgCost;

1384 SROACostSavingOpportunities += SROAArgCost;

1385 }

1386

1387 void onAggregateSROAUse(AllocaInst *Arg) override {

1388 SROACosts.find(Arg)->second += InstrCost;

1389 SROACostSavingOpportunities += InstrCost;

1390 }

1391

1392 void onBlockAnalyzed(const BasicBlock *BB) override {

1394 set(InlineCostFeatureIndex::is_multiple_blocks, 1);

1395 Threshold -= SingleBBBonus;

1396 }

1397

1398 InlineResult finalizeAnalysis() override {

1400 if (Caller->hasMinSize()) {

1401 DominatorTree DT(F);

1402 LoopInfo LI(DT);

1403 for (Loop *L : LI) {

1404

1405 if (DeadBlocks.count(L->getHeader()))

1406 continue;

1407 increment(InlineCostFeatureIndex::num_loops,

1409 }

1410 }

1411 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.size());

1412 set(InlineCostFeatureIndex::simplified_instructions,

1413 NumInstructionsSimplified);

1414 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);

1415 set(InlineCostFeatureIndex::constant_offset_ptr_args,

1416 NumConstantOffsetPtrArgs);

1417 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);

1418

1419 if (NumVectorInstructions <= NumInstructions / 10)

1420 Threshold -= VectorBonus;

1421 else if (NumVectorInstructions <= NumInstructions / 2)

1422 Threshold -= VectorBonus / 2;

1423

1424 set(InlineCostFeatureIndex::threshold, Threshold);

1425

1427 }

1428

1429 bool shouldStop() override { return false; }

1430

1431 void onLoadEliminationOpportunity() override {

1432 increment(InlineCostFeatureIndex::load_elimination, 1);

1433 }

1434

1435 InlineResult onAnalysisStart() override {

1436 increment(InlineCostFeatureIndex::callsite_cost,

1438

1439 set(InlineCostFeatureIndex::cold_cc_penalty,

1440 (F.getCallingConv() == CallingConv::Cold));

1441

1442 set(InlineCostFeatureIndex::last_call_to_static_bonus,

1443 isSoleCallToLocalFunction(CandidateCall, F));

1444

1445

1446

1447

1448 int SingleBBBonusPercent = 50;

1452 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;

1453 VectorBonus = Threshold * VectorBonusPercent / 100;

1454 Threshold += (SingleBBBonus + VectorBonus);

1455

1457 }

1458

1459public:

1460 InlineCostFeaturesAnalyzer(

1461 const TargetTransformInfo &TTI,

1462 function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,

1463 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,

1464 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,

1465 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,

1466 CallBase &Call)

1467 : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,

1468 PSI) {}

1469

1471};

1472

1473}

1474

1475

1476bool CallAnalyzer::isAllocaDerivedArg(Value *V) {

1477 return SROAArgValues.count(V);

1478}

1479

1480void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {

1481 onDisableSROA(SROAArg);

1482 EnabledSROAAllocas.erase(SROAArg);

1483 disableLoadElimination();

1484}

1485

1486void InlineCostAnnotationWriter::emitInstructionAnnot(

1487 const Instruction *I, formatted_raw_ostream &OS) {

1488

1489

1490

1491 std::optional Record = ICCA->getCostDetails(I);

1492 if (!Record)

1493 OS << "; No analysis for the instruction";

1494 else {

1495 OS << "; cost before = " << Record->CostBefore

1496 << ", cost after = " << Record->CostAfter

1497 << ", threshold before = " << Record->ThresholdBefore

1498 << ", threshold after = " << Record->ThresholdAfter << ", ";

1499 OS << "cost delta = " << Record->getCostDelta();

1500 if (Record->hasThresholdChanged())

1501 OS << ", threshold delta = " << Record->getThresholdDelta();

1502 }

1503 auto *V = ICCA->getSimplifiedValueUnchecked(const_cast<Instruction *>(I));

1504 if (V) {

1505 OS << ", simplified to ";

1506 V->print(OS, true);

1508 if (VI->getFunction() != I->getFunction())

1509 OS << " (caller instruction)";

1511 if (VArg->getParent() != I->getFunction())

1512 OS << " (caller argument)";

1513 }

1514 }

1515 OS << "\n";

1516}

1517

1518

1519void CallAnalyzer::disableSROA(Value *V) {

1520 if (auto *SROAArg = getSROAArgForValueOrNull(V)) {

1521 disableSROAForArg(SROAArg);

1522 }

1523}

1524

1525void CallAnalyzer::disableLoadElimination() {

1526 if (EnableLoadElimination) {

1527 onDisableLoadElimination();

1528 EnableLoadElimination = false;

1529 }

1530}

1531

1532

1533

1534

1535

1536bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {

1537 unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());

1538 assert(IntPtrWidth == Offset.getBitWidth());

1539

1541 GTI != GTE; ++GTI) {

1542 ConstantInt *OpC =

1543 getDirectOrSimplifiedValue(GTI.getOperand());

1544 if (!OpC)

1545 return false;

1547 continue;

1548

1549

1550 if (StructType *STy = GTI.getStructTypeOrNull()) {

1552 const StructLayout *SL = DL.getStructLayout(STy);

1554 continue;

1555 }

1556

1557 APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));

1559 }

1560 return true;

1561}

1562

1563

1564

1565

1566bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {

1569 for (const Use &Op : GEP.indices())

1570 if (Constant *SimpleOp = getSimplifiedValue(Op))

1572 else

1577}

1578

1579bool CallAnalyzer::visitAlloca(AllocaInst &I) {

1580 disableSROA(I.getOperand(0));

1581

1582

1583

1584 if (I.isArrayAllocation()) {

1585 Constant *Size = getSimplifiedValue(I.getArraySize());

1587

1588

1589

1590

1591

1592

1593

1594

1595 Type *Ty = I.getAllocatedType();

1597 AllocSize->getLimitedValue(),

1598 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);

1600 HasDynamicAlloca = true;

1601 return false;

1602 }

1603 }

1604

1605

1606 if (I.isStaticAlloca()) {

1607 Type *Ty = I.getAllocatedType();

1608 AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinValue(),

1609 AllocatedSize);

1610 }

1611

1612

1613

1614

1615

1616 if (I.isStaticAlloca())

1617 HasDynamicAlloca = true;

1618

1619 return false;

1620}

1621

1622bool CallAnalyzer::visitPHI(PHINode &I) {

1623

1624

1625

1626

1627

1628

1629

1630

1631

1632

1633 APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));

1634 bool CheckSROA = I.getType()->isPointerTy();

1635

1636

1638 std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};

1639 Value *FirstV = nullptr;

1640

1641 for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {

1642 BasicBlock *Pred = I.getIncomingBlock(i);

1643

1644 if (DeadBlocks.count(Pred))

1645 continue;

1646

1647

1648 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];

1649 if (KnownSuccessor && KnownSuccessor != I.getParent())

1650 continue;

1651

1652 Value *V = I.getIncomingValue(i);

1653

1654 if (&I == V)

1655 continue;

1656

1657 Constant *C = getDirectOrSimplifiedValue(V);

1658

1659 std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};

1660 if (C && CheckSROA)

1661 BaseAndOffset = ConstantOffsetPtrs.lookup(V);

1662

1663 if (C && !BaseAndOffset.first)

1664

1665

1666 return true;

1667

1668 if (FirstC) {

1669 if (FirstC == C)

1670

1671

1672 continue;

1673

1674

1675 return true;

1676 }

1677

1678 if (FirstV) {

1679

1680 if (FirstBaseAndOffset == BaseAndOffset)

1681 continue;

1682 return true;

1683 }

1684

1685 if (C) {

1686

1687 FirstC = C;

1688 continue;

1689 }

1690

1691

1692

1693 FirstV = V;

1694 FirstBaseAndOffset = BaseAndOffset;

1695 }

1696

1697

1698 if (FirstC) {

1699 SimplifiedValues[&I] = FirstC;

1700 return true;

1701 }

1702

1703

1704 if (FirstBaseAndOffset.first) {

1705 ConstantOffsetPtrs[&I] = FirstBaseAndOffset;

1706

1707 if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))

1708 SROAArgValues[&I] = SROAArg;

1709 }

1710

1711 return true;

1712}

1713

1714

1715

1716

1717

1718bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {

1719

1720 std::pair<Value *, APInt> BaseAndOffset =

1721 ConstantOffsetPtrs.lookup(I.getPointerOperand());

1722 if (!BaseAndOffset.first)

1723 return false;

1724

1725

1726

1727 if (!accumulateGEPOffset(cast(I), BaseAndOffset.second))

1728 return false;

1729

1730

1731 ConstantOffsetPtrs[&I] = BaseAndOffset;

1732

1733 return true;

1734}

1735

1736bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {

1737 auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());

1738

1739

1740 auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {

1741 for (const Use &Op : GEP.indices())

1742 if (!getDirectOrSimplifiedValue(Op))

1743 return false;

1744 return true;

1745 };

1746

1749 return true;

1750

1751 if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {

1752 if (SROAArg)

1753 SROAArgValues[&I] = SROAArg;

1754

1755

1756 return true;

1757 }

1758

1759

1760 if (SROAArg)

1761 disableSROAForArg(SROAArg);

1762 return isGEPFree(I);

1763}

1764

1765

1766

1767

1768bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {

1769

1771 return false;

1772 auto *CmpOp = Cmp.getOperand(0);

1773

1774 if (CandidateCall.getCaller() != &F)

1775 return false;

1776

1777 auto *CallBB = CandidateCall.getParent();

1778 auto *Predecessor = CallBB->getSinglePredecessor();

1779 if (!Predecessor)

1780 return false;

1781

1783 if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)

1784 return false;

1785

1786

1787

1788 bool ArgFound = false;

1789 Value *FuncArg = nullptr, *CallArg = nullptr;

1790 for (unsigned ArgNum = 0;

1791 ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {

1792 FuncArg = F.getArg(ArgNum);

1794 if (FuncArg == CmpOp && CallArg != CmpOp) {

1795 ArgFound = true;

1796 break;

1797 }

1798 }

1799 if (!ArgFound)

1800 return false;

1801

1802

1803

1805 CondContext CC(&Cmp);

1806 CC.Invert = (CallBB != Br->getSuccessor(0));

1807 SQ.CC = &CC;

1808 CC.AffectedValues.insert(FuncArg);

1812

1813

1814 if ((ConstVal->isOne() && CC.Invert) ||

1815 (ConstVal->isZero() && !CC.Invert)) {

1816 SimplifiedValues[&Cmp] = ConstVal;

1817 return true;

1818 }

1819 }

1820 return false;

1821}

1822

1823

1824bool CallAnalyzer::simplifyInstruction(Instruction &I) {

1826 for (Value *Op : I.operands()) {

1827 Constant *COp = getDirectOrSimplifiedValue(Op);

1828 if (!COp)

1829 return false;

1831 }

1833 if (C)

1834 return false;

1835 SimplifiedValues[&I] = C;

1836 return true;

1837}

1838

1839

1840

1841

1842

1843

1844

1845

1846

1847

1848bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {

1850 auto *C = getDirectOrSimplifiedValue(Arg);

1851

1853 SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);

1854 return true;

1855}

1856

1857bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {

1858

1859

1861 return false;

1862

1864 true);

1866 if (C)

1867 SimplifiedValues[&CB] = C;

1868 return C;

1869}

1870

1871bool CallAnalyzer::visitBitCast(BitCastInst &I) {

1872

1874 return true;

1875

1876

1877 std::pair<Value *, APInt> BaseAndOffset =

1878 ConstantOffsetPtrs.lookup(I.getOperand(0));

1879

1880 if (BaseAndOffset.first)

1881 ConstantOffsetPtrs[&I] = BaseAndOffset;

1882

1883

1884 if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))

1885 SROAArgValues[&I] = SROAArg;

1886

1887

1888 return true;

1889}

1890

1891bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {

1892

1894 return true;

1895

1896

1897

1899 unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();

1900 if (IntegerSize == DL.getPointerSizeInBits(AS)) {

1901 std::pair<Value *, APInt> BaseAndOffset =

1902 ConstantOffsetPtrs.lookup(I.getOperand(0));

1903 if (BaseAndOffset.first)

1904 ConstantOffsetPtrs[&I] = BaseAndOffset;

1905 }

1906

1907

1908

1909

1910

1911

1912

1913

1914 if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))

1915 SROAArgValues[&I] = SROAArg;

1916

1919}

1920

1921bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {

1922

1924 return true;

1925

1926

1927

1928 Value *Op = I.getOperand(0);

1929 unsigned IntegerSize = Op->getType()->getScalarSizeInBits();

1930 if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {

1931 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);

1932 if (BaseAndOffset.first)

1933 ConstantOffsetPtrs[&I] = BaseAndOffset;

1934 }

1935

1936

1937 if (auto *SROAArg = getSROAArgForValueOrNull(Op))

1938 SROAArgValues[&I] = SROAArg;

1939

1942}

1943

1944bool CallAnalyzer::visitCastInst(CastInst &I) {

1945

1947 return true;

1948

1949

1950

1951 disableSROA(I.getOperand(0));

1952

1953

1954

1955

1956 switch (I.getOpcode()) {

1957 case Instruction::FPTrunc:

1958 case Instruction::FPExt:

1959 case Instruction::UIToFP:

1960 case Instruction::SIToFP:

1961 case Instruction::FPToUI:

1962 case Instruction::FPToSI:

1964 onCallPenalty();

1965 break;

1966 default:

1967 break;

1968 }

1969

1972}

1973

1974bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {

1975 return CandidateCall.paramHasAttr(A->getArgNo(), Attr);

1976}

1977

1978bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {

1979

1980

1981

1982

1983

1985 if (paramHasAttr(A, Attribute::NonNull))

1986 return true;

1987

1988

1989

1990

1991 if (isAllocaDerivedArg(V))

1992

1993

1994

1995 return true;

1996

1997 return false;

1998}

1999

2000bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {

2001

2002

2003

2004

2005

2006

2007

2008

2009

2010

2011

2012

2013

2014

2015

2018 return false;

2020 return false;

2021

2022 return true;

2023}

2024

2025bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,

2026 BlockFrequencyInfo *CallerBFI) {

2027

2028

2029 if (PSI && PSI->hasProfileSummary())

2030 return PSI->isColdCallSite(Call, CallerBFI);

2031

2032

2033 if (!CallerBFI)

2034 return false;

2035

2036

2037

2038

2039

2042 auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);

2043 auto CallerEntryFreq =

2045 return CallSiteFreq < CallerEntryFreq * ColdProb;

2046}

2047

2048std::optional

2049InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,

2050 BlockFrequencyInfo *CallerBFI) {

2051

2052

2053

2054 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))

2056

2057

2058

2060 return std::nullopt;

2061

2062

2063

2064

2065

2067 BlockFrequency CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);

2068 BlockFrequency CallerEntryFreq = CallerBFI->getEntryFreq();

2070 if (Limit && CallSiteFreq >= *Limit)

2072

2073

2074 return std::nullopt;

2075}

2076

2077void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {

2078

2079 if (!allowSizeGrowth(Call)) {

2080 Threshold = 0;

2081 return;

2082 }

2083

2085

2086

2087 auto MinIfValid = [](int A, std::optional B) {

2088 return B ? std::min(A, *B) : A;

2089 };

2090

2091

2092 auto MaxIfValid = [](int A, std::optional B) {

2093 return B ? std::max(A, *B) : A;

2094 };

2095

2096

2097

2098

2099

2100

2101

2102

2103

2104

2105

2106

2107

2108 int SingleBBBonusPercent = 50;

2111

2112

2113 auto DisallowAllBonuses = [&]() {

2114 SingleBBBonusPercent = 0;

2115 VectorBonusPercent = 0;

2116 LastCallToStaticBonus = 0;

2117 };

2118

2119

2120

2121 if (Caller->hasMinSize()) {

2123

2124

2125

2126

2127 SingleBBBonusPercent = 0;

2128 VectorBonusPercent = 0;

2129 } else if (Caller->hasOptSize())

2131

2132

2133

2134 if (Caller->hasMinSize()) {

2135 if (Callee.hasFnAttribute(Attribute::InlineHint))

2136 Threshold = MaxIfValid(Threshold, Params.HintThreshold);

2137

2138

2139

2140

2141

2142

2143

2144

2145 BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;

2149

2150

2151

2152

2156

2157

2158

2159

2160 DisallowAllBonuses();

2162 } else if (PSI) {

2163

2164

2165 if (PSI->isFunctionEntryHot(&Callee)) {

2167

2168

2169

2170 Threshold = MaxIfValid(Threshold, Params.HintThreshold);

2171 } else if (PSI->isFunctionEntryCold(&Callee)) {

2173

2174

2175

2176

2177 DisallowAllBonuses();

2178 Threshold = MinIfValid(Threshold, Params.ColdThreshold);

2179 }

2180 }

2181 }

2182

2184

2185

2186

2188

2189 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;

2190 VectorBonus = Threshold * VectorBonusPercent / 100;

2191

2192

2193

2194

2195 if (isSoleCallToLocalFunction(Call, F)) {

2196 addCost(-LastCallToStaticBonus);

2197 StaticBonusApplied = LastCallToStaticBonus;

2198 }

2199}

2200

2201bool CallAnalyzer::visitCmpInst(CmpInst &I) {

2202 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);

2203

2205 return true;

2206

2207

2208 if (simplifyCmpInstForRecCall(I))

2209 return true;

2210

2211 if (I.getOpcode() == Instruction::FCmp)

2212 return false;

2213

2214

2215

2216 Value *LHSBase, *RHSBase;

2217 APInt LHSOffset, RHSOffset;

2218 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);

2219 if (LHSBase) {

2220 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);

2221 if (RHSBase && LHSBase == RHSBase) {

2222

2223

2225 I.getType(),

2227 ++NumConstantPtrCmps;

2228 return true;

2229 }

2230 }

2231

2232 auto isImplicitNullCheckCmp = [](const CmpInst &I) {

2233 for (auto *User : I.users())

2235 if (Instr->getMetadata(LLVMContext::MD_make_implicit))

2236 return false;

2237 return true;

2238 };

2239

2240

2241

2243 if (isKnownNonNullInCallee(I.getOperand(0))) {

2247 return true;

2248 }

2249

2250

2251 if (isImplicitNullCheckCmp(I))

2252 return true;

2253 }

2255}

2256

2257bool CallAnalyzer::visitSub(BinaryOperator &I) {

2258

2259

2260 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);

2261 Value *LHSBase, *RHSBase;

2262 APInt LHSOffset, RHSOffset;

2263 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);

2264 if (LHSBase) {

2265 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);

2266 if (RHSBase && LHSBase == RHSBase) {

2267

2268

2272 SimplifiedValues[&I] = C;

2273 ++NumConstantPtrDiffs;

2274 return true;

2275 }

2276 }

2277 }

2278

2279

2280

2281 return Base::visitSub(I);

2282}

2283

2284bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {

2285 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);

2286 Constant *CLHS = getDirectOrSimplifiedValue(LHS);

2287 Constant *CRHS = getDirectOrSimplifiedValue(RHS);

2288

2289 Value *SimpleV = nullptr;

2291 SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,

2292 FI->getFastMathFlags(), DL);

2293 else

2294 SimpleV =

2296

2298 SimplifiedValues[&I] = C;

2299

2300 if (SimpleV)

2301 return true;

2302

2303

2304 disableSROA(LHS);

2305 disableSROA(RHS);

2306

2307

2308

2309

2310 using namespace llvm::PatternMatch;

2311 if (I.getType()->isFloatingPointTy() &&

2314 onCallPenalty();

2315

2316 return false;

2317}

2318

2319bool CallAnalyzer::visitFNeg(UnaryOperator &I) {

2320 Value *Op = I.getOperand(0);

2321 Constant *COp = getDirectOrSimplifiedValue(Op);

2322

2325

2327 SimplifiedValues[&I] = C;

2328

2329 if (SimpleV)

2330 return true;

2331

2332

2333 disableSROA(Op);

2334

2335 return false;

2336}

2337

2338bool CallAnalyzer::visitLoad(LoadInst &I) {

2339 if (handleSROA(I.getPointerOperand(), I.isSimple()))

2340 return true;

2341

2342

2343

2344

2345 if (EnableLoadElimination &&

2346 !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {

2347 onLoadEliminationOpportunity();

2348 return true;

2349 }

2350

2351 onMemAccess();

2352 return false;

2353}

2354

2355bool CallAnalyzer::visitStore(StoreInst &I) {

2356 if (handleSROA(I.getPointerOperand(), I.isSimple()))

2357 return true;

2358

2359

2360

2361

2362

2363

2364

2365

2366

2367 disableLoadElimination();

2368

2369 onMemAccess();

2370 return false;

2371}

2372

2373bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {

2374 Value *Op = I.getAggregateOperand();

2375

2376

2377

2378 if (Value *SimpleOp = getSimplifiedValueUnchecked(Op)) {

2379 SimplifyQuery SQ(DL);

2381 if (SimpleV) {

2382 SimplifiedValues[&I] = SimpleV;

2383 return true;

2384 }

2385 }

2386

2387

2388 return Base::visitExtractValue(I);

2389}

2390

2391bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {

2392

2394 return true;

2395

2396

2397 return Base::visitInsertValue(I);

2398}

2399

2400

2401

2402

2403

2404

2405

2406bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {

2407

2408

2409

2410

2412 return false;

2413

2414

2418 Constant *C = getDirectOrSimplifiedValue(I);

2419 if (C)

2420 return false;

2421

2423 }

2425 SimplifiedValues[&Call] = C;

2426 return true;

2427 }

2428

2429 return false;

2430}

2431

2432bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {

2433 const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;

2434 LibFunc LF;

2435 if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))

2437

2438 switch (LF) {

2439 case LibFunc_memcpy_chk:

2440 case LibFunc_memmove_chk:

2441 case LibFunc_mempcpy_chk:

2442 case LibFunc_memset_chk: {

2443

2444

2445

2446

2447

2448

2449 auto *LenOp = getDirectOrSimplifiedValue(Call.getOperand(2));

2450 auto *ObjSizeOp =

2451 getDirectOrSimplifiedValue(Call.getOperand(3));

2452 if (LenOp && ObjSizeOp &&

2453 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {

2454 return false;

2455 }

2456 break;

2457 }

2458 default:

2459 break;

2460 }

2461

2463}

2464

2465bool CallAnalyzer::visitCallBase(CallBase &Call) {

2466 if (!onCallBaseVisitStart(Call))

2467 return true;

2468

2470 F.hasFnAttribute(Attribute::ReturnsTwice)) {

2471

2472 ExposesReturnsTwice = true;

2473 return false;

2474 }

2476 ContainsNoDuplicateCall = true;

2477

2479 onInlineAsm(*InlineAsmOp);

2480

2482 bool IsIndirectCall = F;

2483 if (IsIndirectCall) {

2484

2485

2487 F = getSimplifiedValue(Callee);

2489 onCallArgumentSetup(Call);

2490

2492 disableLoadElimination();

2493 return Base::visitCallBase(Call);

2494 }

2495 }

2496

2497 assert(F && "Expected a call to a known function");

2498

2499

2500 if (simplifyCallSite(F, Call))

2501 return true;

2502

2503

2504

2506 switch (II->getIntrinsicID()) {

2507 default:

2509 disableLoadElimination();

2510 return Base::visitCallBase(Call);

2511

2512 case Intrinsic::load_relative:

2513 onLoadRelativeIntrinsic();

2514 return false;

2515

2516 case Intrinsic::memset:

2517 case Intrinsic::memcpy:

2518 case Intrinsic::memmove:

2519 disableLoadElimination();

2520

2521 return false;

2522 case Intrinsic::icall_branch_funnel:

2523 case Intrinsic::localescape:

2524 HasUninlineableIntrinsic = true;

2525 return false;

2526 case Intrinsic::vastart:

2527 InitsVargArgs = true;

2528 return false;

2529 case Intrinsic::launder_invariant_group:

2530 case Intrinsic::strip_invariant_group:

2531 if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))

2532 SROAArgValues[II] = SROAArg;

2533 return true;

2534 case Intrinsic::is_constant:

2535 return simplifyIntrinsicCallIsConstant(Call);

2536 case Intrinsic::objectsize:

2537 return simplifyIntrinsicCallObjectSize(Call);

2538 }

2539 }

2540

2542

2543

2544 IsRecursiveCall = true;

2545 if (!AllowRecursiveCall)

2546 return false;

2547 }

2548

2549 if (isLoweredToCall(F, Call)) {

2550 onLoweredCall(F, Call, IsIndirectCall);

2551 }

2552

2554 disableLoadElimination();

2555 return Base::visitCallBase(Call);

2556}

2557

2558bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {

2559

2560 bool Free = !HasReturn;

2561 HasReturn = true;

2562 return Free;

2563}

2564

2565bool CallAnalyzer::visitBranchInst(BranchInst &BI) {

2566

2567

2568

2569

2571 getDirectOrSimplifiedValue(BI.getCondition()) ||

2572 BI.getMetadata(LLVMContext::MD_make_implicit);

2573}

2574

2575bool CallAnalyzer::visitSelectInst(SelectInst &SI) {

2576 bool CheckSROA = SI.getType()->isPointerTy();

2579

2580 Constant *TrueC = getDirectOrSimplifiedValue(TrueVal);

2581 Constant *FalseC = getDirectOrSimplifiedValue(FalseVal);

2582 Constant *CondC = getSimplifiedValue(SI.getCondition());

2583

2584 if (!CondC) {

2585

2586 if (TrueC == FalseC && TrueC) {

2587 SimplifiedValues[&SI] = TrueC;

2588 return true;

2589 }

2590

2591 if (!CheckSROA)

2592 return Base::visitSelectInst(SI);

2593

2594 std::pair<Value *, APInt> TrueBaseAndOffset =

2595 ConstantOffsetPtrs.lookup(TrueVal);

2596 std::pair<Value *, APInt> FalseBaseAndOffset =

2597 ConstantOffsetPtrs.lookup(FalseVal);

2598 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {

2599 ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;

2600

2601 if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))

2602 SROAArgValues[&SI] = SROAArg;

2603 return true;

2604 }

2605

2606 return Base::visitSelectInst(SI);

2607 }

2608

2609

2612 : nullptr;

2613 if (!SelectedV) {

2614

2615

2616

2617 if (TrueC && FalseC) {

2619 SimplifiedValues[&SI] = C;

2620 return true;

2621 }

2622 }

2623 return Base::visitSelectInst(SI);

2624 }

2625

2626

2628 SimplifiedValues[&SI] = SelectedC;

2629 return true;

2630 }

2631

2632 if (!CheckSROA)

2633 return true;

2634

2635 std::pair<Value *, APInt> BaseAndOffset =

2636 ConstantOffsetPtrs.lookup(SelectedV);

2637 if (BaseAndOffset.first) {

2638 ConstantOffsetPtrs[&SI] = BaseAndOffset;

2639

2640 if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))

2641 SROAArgValues[&SI] = SROAArg;

2642 }

2643

2644 return true;

2645}

2646

2647bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {

2648

2649

2650 if (getDirectOrSimplifiedValue(SI.getCondition()))

2651 return true;

2652

2653

2654

2655

2656

2657

2658

2659

2660

2661

2662

2663

2664

2665 unsigned JumpTableSize = 0;

2666 BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;

2667 unsigned NumCaseCluster =

2669

2670 onFinalizeSwitch(JumpTableSize, NumCaseCluster, SI.defaultDestUnreachable());

2671 return false;

2672}

2673

2674bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {

2675

2676

2677

2678

2679

2680

2681

2682

2683 HasIndirectBr = true;

2684 return false;

2685}

2686

2687bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {

2688

2689

2690 return false;

2691}

2692

2693bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {

2694

2695

2696 return false;

2697}

2698

2699bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {

2700

2701

2702 return false;

2703}

2704

2705bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {

2706

2707

2708

2709 return true;

2710}

2711

2712bool CallAnalyzer::visitInstruction(Instruction &I) {

2713

2714

2717 return true;

2718

2719

2720

2721 for (const Use &Op : I.operands())

2722 disableSROA(Op);

2723

2724 return false;

2725}

2726

2727

2728

2729

2730

2731

2732

2733

2734InlineResult

2735CallAnalyzer::analyzeBlock(BasicBlock *BB,

2736 const SmallPtrSetImpl<const Value *> &EphValues) {

2737 for (Instruction &I : *BB) {

2738

2739

2740

2741

2742

2743

2744

2745

2746 if (I.isDebugOrPseudoInst())

2747 continue;

2748

2749

2750 if (EphValues.count(&I))

2751 continue;

2752

2753 ++NumInstructions;

2755 ++NumVectorInstructions;

2756

2757

2758

2759

2760

2761

2762 onInstructionAnalysisStart(&I);

2763

2764 if (Base::visit(&I))

2765 ++NumInstructionsSimplified;

2766 else

2767 onMissedSimplification();

2768

2769 onInstructionAnalysisFinish(&I);

2770 using namespace ore;

2771

2773 if (IsRecursiveCall && !AllowRecursiveCall)

2775 else if (ExposesReturnsTwice)

2777 else if (HasDynamicAlloca)

2779 else if (HasIndirectBr)

2781 else if (HasUninlineableIntrinsic)

2783 else if (InitsVargArgs)

2785 if (IR.isSuccess()) {

2786 if (ORE)

2787 ORE->emit([&]() {

2788 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",

2789 &CandidateCall)

2790 << NV("Callee", &F) << " has uninlinable pattern ("

2791 << NV("InlineResult", IR.getFailureReason())

2792 << ") and cost is not fully computed";

2793 });

2794 return IR;

2795 }

2796

2797

2798

2799

2801 auto IR =

2803 if (ORE)

2804 ORE->emit([&]() {

2805 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",

2806 &CandidateCall)

2807 << NV("Callee", &F) << " is "

2808 << NV("InlineResult", IR.getFailureReason())

2809 << ". Cost is not fully computed";

2810 });

2811 return IR;

2812 }

2813

2814 if (shouldStop())

2816 "Call site analysis is not favorable to inlining.");

2817 }

2818

2820}

2821

2822

2823

2824

2825

2826

2827

2828ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {

2829 if (V->getType()->isPointerTy())

2830 return nullptr;

2831

2832 unsigned AS = V->getType()->getPointerAddressSpace();

2833 unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);

2835

2836

2837

2838 SmallPtrSet<Value *, 4> Visited;

2840 do {

2842 if (GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))

2843 return nullptr;

2844 V = GEP->getPointerOperand();

2846 if (GA->isInterposable())

2847 break;

2848 V = GA->getAliasee();

2849 } else {

2850 break;

2851 }

2852 assert(V->getType()->isPointerTy() && "Unexpected operand type!");

2853 } while (Visited.insert(V).second);

2854

2855 Type *IdxPtrTy = DL.getIndexType(V->getType());

2857}

2858

2859

2860

2861

2862

2863

2864

2865

2866void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {

2868

2869

2870 if (DeadBlocks.count(Pred))

2871 return true;

2872 BasicBlock *KnownSucc = KnownSuccessors[Pred];

2873 return KnownSucc && KnownSucc != Succ;

2874 };

2875

2876 auto IsNewlyDead = [&](BasicBlock *BB) {

2877

2878 return (!DeadBlocks.count(BB) &&

2880 [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));

2881 };

2882

2883 for (BasicBlock *Succ : successors(CurrBB)) {

2884 if (Succ == NextBB || !IsNewlyDead(Succ))

2885 continue;

2888 while (!NewDead.empty()) {

2890 if (DeadBlocks.insert(Dead).second)

2891

2892 for (BasicBlock *S : successors(Dead))

2893 if (IsNewlyDead(S))

2895 }

2896 }

2897}

2898

2899

2900

2901

2902

2903

2904

2905

2906InlineResult CallAnalyzer::analyze() {

2907 ++NumCallsAnalyzed;

2908

2909 auto Result = onAnalysisStart();

2910 if (Result.isSuccess())

2912

2913 if (F.empty())

2915

2917

2918 for (User *U : Caller->users()) {

2921 IsCallerRecursive = true;

2922 break;

2923 }

2924 }

2925

2926

2927

2928 auto CAI = CandidateCall.arg_begin();

2929 for (Argument &FAI : F.args()) {

2931 SimplifiedValues[&FAI] = *CAI;

2933 ++NumConstantArgs;

2934

2935 Value *PtrArg = *CAI;

2936 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {

2937 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());

2938

2939

2941 SROAArgValues[&FAI] = SROAArg;

2942 onInitializeSROAArg(SROAArg);

2943 EnabledSROAAllocas.insert(SROAArg);

2944 }

2945 }

2946 ++CAI;

2947 }

2948 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();

2949 NumAllocaArgs = SROAArgValues.size();

2950

2951

2952

2953 SmallPtrSet<const Value *, 32> EphValuesStorage;

2954 const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;

2955 if (GetEphValuesCache)

2956 EphValues = &GetEphValuesCache(F).ephValues();

2957 else

2959 EphValuesStorage);

2960

2961

2962

2963

2964

2965

2966

2967

2968 typedef SmallSetVector<BasicBlock *, 16> BBSetVector;

2969 BBSetVector BBWorklist;

2970 BBWorklist.insert(&F.getEntryBlock());

2971

2972

2973 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {

2974 if (shouldStop())

2975 break;

2976

2978 if (BB->empty())

2979 continue;

2980

2981 onBlockStart(BB);

2982

2983

2984

2985

2986

2987

2988

2989

2990

2995

2996

2997

2998 InlineResult IR = analyzeBlock(BB, *EphValues);

2999 if (IR.isSuccess())

3000 return IR;

3001

3003

3004

3005

3009 if (ConstantInt *SimpleCond = getSimplifiedValue(Cond)) {

3011 BBWorklist.insert(NextBB);

3012 KnownSuccessors[BB] = NextBB;

3013 findDeadBlocks(BB, NextBB);

3014 continue;

3015 }

3016 }

3019 if (ConstantInt *SimpleCond = getSimplifiedValue(Cond)) {

3020 BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();

3021 BBWorklist.insert(NextBB);

3022 KnownSuccessors[BB] = NextBB;

3023 findDeadBlocks(BB, NextBB);

3024 continue;

3025 }

3026 }

3027

3028

3029

3030 BBWorklist.insert_range(successors(BB));

3031

3032 onBlockAnalyzed(BB);

3033 }

3034

3035

3036

3037

3038 if (!isSoleCallToLocalFunction(CandidateCall, F) && ContainsNoDuplicateCall)

3040

3041

3042

3043

3048 FinalStackSizeThreshold = *AttrMaxStackSize;

3049 if (AllocatedSize > FinalStackSizeThreshold)

3051

3052 return finalizeAnalysis();

3053}

3054

3055void InlineCostCallAnalyzer::print(raw_ostream &OS) {

3056#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"

3058 F.print(OS, &Writer);

3073#undef DEBUG_PRINT_STAT

3074}

3075

3076#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

3077

3079#endif

3080

3081

3082

3086

3087

3088

3089

3090 auto CalleeTLI = GetTLI(*Callee);

3092 TTI.areInlineCompatible(Caller, Callee)) &&

3093 GetTLI(*Caller).areInlineCompatible(CalleeTLI,

3095 AttributeFuncs::areInlineCompatible(*Caller, *Callee);

3096}

3097

3100 int64_t Cost = 0;

3101 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {

3102 if (Call.isByValArgument(I)) {

3103

3104

3106 unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));

3108 unsigned PointerSize = DL.getPointerSizeInBits(AS);

3109

3110 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;

3111

3112

3113

3114

3115

3116

3117

3118 NumStores = std::min(NumStores, 8U);

3119

3121 } else {

3122

3123

3125 }

3126 }

3127

3130

3131 return std::min<int64_t>(Cost, INT_MAX);

3132}

3133

3142 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,

3143 GetEphValuesCache);

3144}

3145

3152 const InlineParams Params = { 0,

3153 {},

3154 {},

3155 {},

3156 {},

3157 {},

3158 {},

3159 {},

3160 true,

3161 true};

3162

3163 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,

3164 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,

3165 true);

3166 auto R = CA.analyze();

3167 if (!R.isSuccess())

3168 return std::nullopt;

3169 return CA.getCost();

3170}

3171

3178 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,

3179 PSI, ORE, *Call.getCalledFunction(), Call);

3180 auto R = CFA.analyze();

3181 if (!R.isSuccess())

3182 return std::nullopt;

3183 return CFA.features();

3184}

3185

3189

3190

3191 if (!Callee)

3193

3194

3195

3196

3197

3198 if (Callee->isPresplitCoroutine())

3200

3201

3202

3203

3204

3205

3206 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();

3207 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)

3208 if (Call.isByValArgument(I)) {

3212 " address space");

3213 }

3214

3215

3216

3217 if (Call.hasFnAttr(Attribute::AlwaysInline)) {

3218 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))

3220

3222 if (IsViable.isSuccess())

3225 }

3226

3227

3228

3232

3233

3234 if (Caller->hasOptNone())

3236

3237

3238

3239 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())

3241

3242

3243 if (Callee->isInterposable())

3245

3246

3247 if (Callee->hasFnAttribute(Attribute::NoInline))

3249

3250

3251 if (Call.isNoInline())

3253

3254

3255 if (Callee->hasFnAttribute("loader-replaceable"))

3257

3258 return std::nullopt;

3259}

3260

3269

3270 auto UserDecision =

3272

3273 if (UserDecision) {

3274 if (UserDecision->isSuccess())

3277 }

3278

3281 "Inlining forced by -inline-all-viable-calls");

3282

3284 << "... (caller:" << Call.getCaller()->getName()

3285 << ")\n");

3286

3287 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,

3288 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,

3289 true, false,

3290 GetEphValuesCache);

3292

3294

3295

3296

3297

3298 if (CA.wasDecidedByCostBenefit()) {

3301 CA.getCostBenefitPair());

3302 else

3304 }

3305

3306 if (CA.wasDecidedByCostThreshold())

3308 CA.getStaticBonusApplied());

3309

3310

3314}

3315

3317 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);

3319

3322

3323

3324

3329

3330 for (auto &II : BB) {

3333 continue;

3334

3335

3336 Function *Callee = Call->getCalledFunction();

3337 if (&F == Callee)

3339

3340

3341

3345

3346 if (Callee)

3347 switch (Callee->getIntrinsicID()) {

3348 default:

3349 break;

3350 case llvm::Intrinsic::icall_branch_funnel:

3351

3352

3354 "disallowed inlining of @llvm.icall.branch.funnel");

3355 case llvm::Intrinsic::localescape:

3356

3357

3359 "disallowed inlining of @llvm.localescape");

3360 case llvm::Intrinsic::vastart:

3361

3362

3364 "contains VarArgs initialized with va_start");

3365 }

3366 }

3367 }

3368

3370}

3371

3372

3373

3374

3377

3378

3379

3380

3381

3382

3383

3384

3387 else

3389

3390

3392

3393

3395

3396

3397

3398

3399

3400

3401

3402

3405

3406

3407

3409

3410

3411

3412

3413

3414

3415

3416

3417

3422 } else if (ColdThreshold.getNumOccurrences() > 0) {

3424 }

3425 return Params;

3426}

3427

3431

3432

3433

3435 unsigned SizeOptLevel) {

3436 if (OptLevel > 2)

3438 if (SizeOptLevel == 1)

3440 if (SizeOptLevel == 2)

3443}

3444

3446 auto Params =

3448

3449

3450

3451 if (OptLevel > 2)

3453 return Params;

3454}

3455

3463 };

3464

3469

3470

3471

3472

3473

3474

3480 if (!CalledFunction || CalledFunction->isDeclaration())

3481 continue;

3483 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI,

3484 GetAssumptionCache, nullptr, nullptr, PSI,

3485 &ORE);

3486 ICCA.analyze();

3487 OS << " Analyzing call of " << CalledFunction->getName()

3489 ICCA.print(OS);

3490 OS << "\n";

3491 }

3492 }

3493 }

3495}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)

static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)

Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...

static cl::opt< int > InlineAsmInstrCost("inline-asm-instr-cost", cl::Hidden, cl::init(0), cl::desc("Cost of a single inline asm instruction when inlining"))

static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))

static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))

static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))

static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))

static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))

static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))

static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))

static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))

static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))

static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))

static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))

static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)

Definition InlineCost.cpp:3434

static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))

static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))

static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))

static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))

static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))

static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))

#define DEBUG_PRINT_STAT(x)

static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))

static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))

static cl::opt< bool > InlineAllViableCalls("inline-all-viable-calls", cl::Hidden, cl::init(false), cl::desc("Inline all viable calls, even if they exceed the inlining " "threshold"))

static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))

static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)

Test that there are no attribute conflicts between Caller and Callee that prevent inlining.

Definition InlineCost.cpp:3083

static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))

static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))

static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))

static Constant * getFalse(Type *Ty)

For a boolean type or a vector of boolean type, return false or a vector with every element false.

Legalize the Machine IR a function s Machine IR

Machine Check Debug Module

uint64_t IntrinsicInst * II

FunctionAnalysisManager FAM

const SmallVectorImpl< MachineOperand > & Cond

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static SymbolRef::Type getType(const Symbol *Sym)

This pass exposes codegen information to IR-level passes.

LLVM_ABI APInt udiv(const APInt &RHS) const

Unsigned division operation.

bool ult(const APInt &RHS) const

Unsigned less than comparison.

LLVM_ABI APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

static APInt getZero(unsigned numBits)

Get the '0' value for the specified bit-width.

bool uge(const APInt &RHS) const

Unsigned greater or equal comparison.

PointerType * getType() const

Overload to return most specific pointer type.

A function analysis which provides an AssumptionCache.

A cache of @llvm.assume calls within a function.

Functions, function parameters, and return types can have attributes to indicate how they should be t...

LLVM_ABI StringRef getValueAsString() const

Return the attribute's value as a string.

bool isValid() const

Return true if the attribute is any kind of attribute.

LLVM Basic Block Representation.

bool hasAddressTaken() const

Returns true if there are any uses of this basic block other than direct branches,...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

static LLVM_ABI BlockAddress * get(Function *F, BasicBlock *BB)

Return a BlockAddress for the specified function and basic block.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const

Returns the estimated profile count of BB.

LLVM_ABI BlockFrequency getEntryFreq() const

LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const

getblockFreq - Return block frequency.

LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const

Multiplies frequency with Factor. Returns nullopt in case of overflow.

bool isConditional() const

BasicBlock * getSuccessor(unsigned i) const

bool isUnconditional() const

Value * getCondition() const

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

bool hasFnAttr(Attribute::AttrKind Kind) const

Determine whether this call has the given attribute.

LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const

Determine whether the argument or parameter has the given attribute.

User::op_iterator arg_begin()

Return the iterator pointing to the beginning of the argument list.

bool onlyReadsMemory(unsigned OpNo) const

Value * getCalledOperand() const

Attribute getFnAttr(StringRef Kind) const

Get the attribute of a given kind for the function.

Value * getArgOperand(unsigned i) const

User::op_iterator arg_end()

Return the iterator pointing to the end of the argument list.

FunctionType * getFunctionType() const

iterator_range< User::op_iterator > args()

Iteration adapter for range-for loops.

unsigned arg_size() const

LLVM_ABI Function * getCaller()

Helper to get the caller (the parent function).

static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)

static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)

bool isZero() const

This is just a convenience method to make client code smaller for a common code.

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

const APInt & getValue() const

Return the constant as an APInt value reference.

static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)

LLVM_ABI bool isAllOnesValue() const

Return true if this is the value that would be returned by getAllOnesValue.

LLVM_ABI bool isNullValue() const

Return true if this is the value that would be returned by getNullValue.

A parsed version of the target data layout string in and methods for querying it.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

A cache of ephemeral values within a function.

Type * getReturnType() const

const BasicBlock & getEntryBlock() const

LLVM_ABI bool isDeclaration() const

Return true if the primary definition of this global value is outside of the current translation unit...

static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)

Return result of LHS Pred RHS comparison.

LLVM_ABI void collectAsmStrs(SmallVectorImpl< StringRef > &AsmStrs) const

Represents the cost of inlining a function.

static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)

static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)

static InlineCost get(int Cost, int Threshold, int StaticBonus=0)

InlineResult is basically true or false.

static InlineResult success()

static InlineResult failure(const char *Reason)

const char * getFailureReason() const

Base class for instruction visitors.

LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY

Return the number of successors that this instruction has.

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

Class to represent pointers.

unsigned getAddressSpace() const

Return the address space of the Pointer type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

An analysis pass based on the new PM to deliver ProfileSummaryInfo.

Analysis providing profile information.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

void reserve(size_type N)

void push_back(const T &Elt)

StringRef - Represent a constant reference to a string, i.e.

static constexpr size_t npos

bool getAsInteger(unsigned Radix, T &Result) const

Parse the current string as an integer of the specified radix.

constexpr StringRef substr(size_t Start, size_t N=npos) const

Return a reference to the substring from [Start, Start + N).

bool starts_with(StringRef Prefix) const

Check if this string starts with the given Prefix.

constexpr bool empty() const

empty - Check if the string is empty.

bool contains(StringRef Other) const

Return true if the given string is a substring of *this, and false otherwise.

size_t find(char C, size_t From=0) const

Search for the first character C in the string.

TypeSize getElementOffset(unsigned Idx) const

Analysis pass providing the TargetTransformInfo.

Provides information about what library functions are available for the current target.

bool has(LibFunc F) const

Tests whether a library function is available.

bool getLibFunc(StringRef funcName, LibFunc &F) const

Searches for a particular function name.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const

Returns a penalty for invoking call Call in F.

LLVM_ABI unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const

LLVM_ABI unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const

@ TCK_SizeAndLatency

The weighted sum of size and latency.

LLVM_ABI int getInliningLastCallToStaticBonus() const

LLVM_ABI unsigned adjustInliningThreshold(const CallBase *CB) const

LLVM_ABI unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const

LLVM_ABI int getInlinerVectorBonusPercent() const

LLVM_ABI bool isLoweredToCall(const Function *F) const

Test whether calls to a function lower to actual program function calls.

LLVM_ABI unsigned getInliningThresholdMultiplier() const

@ TCC_Expensive

The cost of a 'div' instruction on x86.

@ TCC_Free

Expected to fold away in lowering.

LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const

Estimate the cost of a given IR user when lowered.

LLVM_ABI unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const

LLVM_ABI InstructionCost getFPOpCost(Type *Ty) const

Return the expected cost of supporting the floating point operation of the specified type.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

Value * getOperand(unsigned i) const

LLVM Value Representation.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

int getNumOccurrences() const

std::pair< iterator, bool > insert(const ValueT &V)

bool erase(const ValueT &V)

An efficient, type-erasing, non-owning reference to a callable.

const ParentTy * getParent() const

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

const char FunctionInlineCostMultiplierAttributeName[]

const int OptSizeThreshold

Use when optsize (-Os) is specified.

const int OptMinSizeThreshold

Use when minsize (-Oz) is specified.

const uint64_t MaxSimplifiedDynamicAllocaToInline

Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...

const int IndirectCallThreshold

const int OptAggressiveThreshold

Use when -O3 is specified.

const char MaxInlineStackSizeAttributeName[]

const unsigned TotalAllocaSizeRecursiveCaller

Do not inline functions which allocate this many bytes on the stack when the caller is recursive.

LLVM_ABI int getInstrCost()

Definition InlineCost.cpp:206

bool match(Val *V, const Pattern &P)

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

FNeg_match< OpTy > m_FNeg(const OpTy &X)

Match 'fneg X' as 'fsub -0.0, X'.

initializer< Ty > init(const Ty &Val)

DiagnosticInfoOptimizationBase::Argument NV

NodeAddr< InstrNode * > Instr

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)

LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)

Attempt to constant fold a select instruction with the specified operands.

LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)

Return true if it is an intrinsic that cannot be speculated but also cannot trap.

LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)

canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)

Definition InlineCost.cpp:197

auto successors(const MachineBasicBlock *BB)

OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy

Provide the ModuleAnalysisManager to Function proxy.

LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)

Try to turn a call to @llvm.objectsize into an integer value of the given Type.

LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)

Like simplifyInstruction but the operands of I are replaced with NewOps.

LogicalResult failure(bool IsFailure=true)

Utility function to generate a LogicalResult.

gep_type_iterator gep_type_end(const User *GEP)

LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)

ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...

LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)

See if we can compute a simplified version of this instruction.

LLVM_ABI InlineResult isInlineViable(Function &Callee)

Check if it is mechanically possible to inline the function Callee, based on the contents of the func...

Definition InlineCost.cpp:3316

auto dyn_cast_or_null(const Y &Val)

LLVM_ABI Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)

Given operand for an FNeg, fold the result or return null.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

generic_gep_type_iterator<> gep_type_iterator

std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)

Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.

Function::ProfileCount ProfileCount

LLVM_ABI std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)

Get the expanded cost features.

Definition InlineCost.cpp:3172

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)

Given operands for an ExtractValueInst, fold the result or return null.

LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)

Get an InlineCost object representing the cost of inlining this callsite.

Definition InlineCost.cpp:3134

LLVM_ABI std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)

Returns InlineResult::success() if the call site should be always inlined because of user directives,...

Definition InlineCost.cpp:3186

LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)

Given operands for a BinaryOperator, fold the result or return null.

DWARFExpression::Operation Op

LLVM_ABI InlineParams getInlineParams()

Generate the parameters to tune the inline cost analysis based only on the commandline options.

Definition InlineCost.cpp:3428

LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)

Return the cost associated with a callsite, including parameter passing and the call/return instructi...

Definition InlineCost.cpp:3098

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

gep_type_iterator gep_type_begin(const User *GEP)

LLVM_ABI std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)

Get the cost estimate ignoring thresholds.

Definition InlineCost.cpp:3146

auto predecessors(const MachineBasicBlock *BB)

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)

ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.

std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)

Add two unsigned integers, X and Y, of type T.

std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures

static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)

Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).

LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)

Definition InlineCost.cpp:3457

Thresholds to tune inline cost analysis.

std::optional< int > OptMinSizeThreshold

Threshold to use when the caller is optimized for minsize.

std::optional< int > OptSizeThreshold

Threshold to use when the caller is optimized for size.

std::optional< int > ColdCallSiteThreshold

Threshold to use when the callsite is considered cold.

std::optional< int > ColdThreshold

Threshold to use for cold callees.

std::optional< int > HotCallSiteThreshold

Threshold to use when the callsite is considered hot.

int DefaultThreshold

The default threshold to start with for a callee.

std::optional< int > HintThreshold

Threshold to use for callees with inline hint.

std::optional< int > LocallyHotCallSiteThreshold

Threshold to use when the callsite is considered hot relative to function entry.