LLVM: lib/Transforms/IPO/FunctionSpecialization.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

23#include

24

25using namespace llvm;

26

27#define DEBUG_TYPE "function-specialization"

28

29STATISTIC(NumSpecsCreated, "Number of specializations created");

30

31namespace llvm {

32

36 "Force function specialization for every call site with a constant "

37 "argument"));

38

41 "The maximum number of clones allowed for a single function "

42 "specialization"));

43

47 cl::desc("The maximum number of iterations allowed "

48 "when searching for transitive "

49 "phis"));

50

53 cl::desc("The maximum number of incoming values a PHI node can have to be "

54 "considered during the specialization bonus estimation"));

55

58 "The maximum number of predecessors a basic block can have to be "

59 "considered during the estimation of dead code"));

60

63 cl::desc("Don't specialize functions that have less than this number of "

64 "instructions"));

65

68 "Maximum codesize growth allowed per function"));

69

72 cl::desc("Reject specializations whose codesize savings are less than this "

73 "much percent of the original function size"));

74

77 cl::desc("Reject specializations whose latency savings are less than this "

78 "much percent of the original function size"));

79

82 cl::desc("Reject specializations whose inlining bonus is less than this "

83 "much percent of the original function size"));

84

87 "Enable function specialization on the address of global values"));

88

92 "Enable specialization of functions that take a literal constant as an "

93 "argument"));

94

96

97}

98

99bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB,

101 unsigned I = 0;

105 });

106}

107

108

109

110

111

112

113

114Cost InstCostVisitor::estimateBasicBlocks(

117

118 while (!WorkList.empty()) {

120

121

122

123

124 assert(Solver.isBlockExecutable(BB) && "BB already found dead by IPSCCP!");

125 if (!DeadBlocks.insert(BB).second)

126 continue;

127

128 for (Instruction &I : *BB) {

129

130 if (KnownConstants.contains(&I))

131 continue;

132

134

136 << " for user " << I << "\n");

138 }

139

140

141

142 for (BasicBlock *SuccBB : successors(BB))

143 if (isBlockExecutable(SuccBB) && canEliminateSuccessor(BB, SuccBB))

145 }

147}

148

149Constant *InstCostVisitor::findConstantFor(Value *V) const {

151 return C;

152 if (auto *C = Solver.getConstantOrNull(V))

153 return C;

154 return KnownConstants.lookup(V);

155}

156

159 while (!PendingPHIs.empty()) {

160 Instruction *Phi = PendingPHIs.pop_back_val();

161

163 CodeSize += getCodeSizeSavingsForUser(Phi);

164 }

166}

167

168

170 LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "

171 << C->getNameOrAsOperand() << "\n");

173 for (auto *U : A->users())

176 CodeSize += getCodeSizeSavingsForUser(UI, A, C);

177

178 LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "

179 << CodeSize << "} for argument " << *A << "\n");

181}

182

183

184

185

186

187

188

189

190

191

192

193

194

196 auto &BFI = GetBFI(*F);

197 Cost TotalLatency = 0;

198

199 for (auto Pair : KnownConstants) {

201 if (I)

202 continue;

203

204 uint64_t Weight = BFI.getBlockFreq(I->getParent()).getFrequency() /

205 BFI.getEntryFreq().getFrequency();

206

209

211 << "} for instruction " << *I << "\n");

212

214 }

215

216 return TotalLatency;

217}

218

221

223 return 0;

224

225

226 LastVisited = Use ? KnownConstants.insert({Use, C}).first

227 : KnownConstants.end();

228

231 CodeSize = estimateSwitchInst(*I);

233 CodeSize = estimateBranchInst(*I);

234 } else {

236 if (C)

237 return 0;

238 }

239

240

241

242

243 KnownConstants.insert({User, C});

244

246

248 << "} for user " << *User << "\n");

249

250 for (auto *U : User->users())

253 CodeSize += getCodeSizeSavingsForUser(UI, User, C);

254

256}

257

259 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

260

261 if (I.getCondition() != LastVisited->first)

262 return 0;

263

265 if (C)

266 return 0;

267

268 BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor();

269

270

271

273 for (const auto &Case : I.cases()) {

274 BasicBlock *BB = Case.getCaseSuccessor();

276 canEliminateSuccessor(I.getParent(), BB))

278 }

279

280 return estimateBasicBlocks(WorkList);

281}

282

284 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

285

286 if (I.getCondition() != LastVisited->first)

287 return 0;

288

289 BasicBlock *Succ = I.getSuccessor(LastVisited->second->isOneValue());

290

291

293 if (isBlockExecutable(Succ) && canEliminateSuccessor(I.getParent(), Succ))

295

296 return estimateBasicBlocks(WorkList);

297}

298

299bool InstCostVisitor::discoverTransitivelyIncomingValues(

301

304 unsigned Iter = 0;

305

306 while (!WorkList.empty()) {

308

311 return false;

312

313 if (!TransitivePHIs.insert(PN).second)

314 continue;

315

318

319

322 continue;

323

324 if (Constant *C = findConstantFor(V)) {

325

326 if (C != Const)

327 return false;

328 continue;

329 }

330

333 continue;

334 }

335

336

337 return false;

338 }

339 }

340 return true;

341}

342

345 return nullptr;

346

347 bool Inserted = VisitedPHIs.insert(&I).second;

349 bool HaveSeenIncomingPHI = false;

350

351 for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {

352 Value *V = I.getIncomingValue(Idx);

353

354

357 continue;

358

359 if (Constant *C = findConstantFor(V)) {

360 if (!Const)

362

363 if (C != Const)

364 return nullptr;

365 continue;

366 }

367

368 if (Inserted) {

369

370

371 PendingPHIs.push_back(&I);

372 return nullptr;

373 }

374

376

377 HaveSeenIncomingPHI = true;

378 continue;

379 }

380

381

382 return nullptr;

383 }

384

385 if (!Const)

386 return nullptr;

387

388 if (!HaveSeenIncomingPHI)

390

391 DenseSet<PHINode *> TransitivePHIs;

392 if (!discoverTransitivelyIncomingValues(Const, &I, TransitivePHIs))

393 return nullptr;

394

396}

397

399 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

400

402 return LastVisited->second;

403 return nullptr;

404}

405

407 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

408

409 Function *F = I.getCalledFunction();

411 return nullptr;

412

414 Operands.reserve(I.getNumOperands());

415

416 for (unsigned Idx = 0, E = I.getNumOperands() - 1; Idx != E; ++Idx) {

417 Value *V = I.getOperand(Idx);

419 return nullptr;

420 Constant *C = findConstantFor(V);

421 if (C)

422 return nullptr;

424 }

425

428}

429

431 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

432

434 return nullptr;

436}

437

440 Operands.reserve(I.getNumOperands());

441

442 for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) {

443 Value *V = I.getOperand(Idx);

444 Constant *C = findConstantFor(V);

445 if (C)

446 return nullptr;

448 }

449

452}

453

455 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

456

457 if (I.getCondition() == LastVisited->first) {

458 Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue()

459 : I.getTrueValue();

460 return findConstantFor(V);

461 }

462 if (Constant *Condition = findConstantFor(I.getCondition()))

463 if ((I.getTrueValue() == LastVisited->first && Condition->isOneValue()) ||

464 (I.getFalseValue() == LastVisited->first && Condition->isZeroValue()))

465 return LastVisited->second;

466 return nullptr;

467}

468

471 I.getType(), DL);

472}

473

475 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

476

478 bool ConstOnRHS = I.getOperand(1) == LastVisited->first;

479 Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);

481

483 if (ConstOnRHS)

486 }

487

488

489

491 const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V);

492 auto &V1State = ConstOnRHS ? OtherLV : ConstLV;

493 auto &V2State = ConstOnRHS ? ConstLV : OtherLV;

494 return V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL);

495}

496

498 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

499

501}

502

504 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");

505

506 bool ConstOnRHS = I.getOperand(1) == LastVisited->first;

507 Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);

510 Value *ConstVal = LastVisited->second;

511

512 if (ConstOnRHS)

514

516 simplifyBinOp(I.getOpcode(), ConstVal, OtherVal, SimplifyQuery(DL)));

517}

518

519Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca,

521 Value *StoreValue = nullptr;

522 for (auto *User : Alloca->users()) {

523

524

525 if (User == Call)

526 continue;

527

529

530 if (StoreValue || Store->isVolatile())

531 return nullptr;

532 StoreValue = Store->getValueOperand();

533 continue;

534 }

535

536 return nullptr;

537 }

538

539 if (!StoreValue)

540 return nullptr;

541

542 return getCandidateConstant(StoreValue);

543}

544

545

546

547

550 if (!Val)

551 return nullptr;

554 return ConstVal;

557 return nullptr;

558 return getPromotableAlloca(Alloca, Call);

559}

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584void FunctionSpecializer::promoteConstantStackValues(Function *F) {

585 for (User *U : F->users()) {

586

589 continue;

590

591 if (!Solver.isBlockExecutable(Call->getParent()))

592 continue;

593

594 for (const Use &U : Call->args()) {

598

600 continue;

601

602 auto *ConstVal = getConstantStackValue(Call, ArgOp);

603 if (!ConstVal)

604 continue;

605

606 Value *GV = new GlobalVariable(M, ConstVal->getType(), true,

608 "specialized.arg." + Twine(++NGlobals));

610 }

611 }

612}

613

614

615

620 if (!BC || BC->getType() != BC->getOperand(0)->getType())

621 continue;

622 Inst.replaceAllUsesWith(BC->getOperand(0));

623 Inst.eraseFromParent();

624 }

625 }

626}

627

628

629void FunctionSpecializer::cleanUpSSA() {

630 for (Function *F : Specializations)

632}

633

634

637

639

641 return static_cast<unsigned>(hash_value(S));

642 }

643

645 return LHS == RHS;

646 }

647};

648

651 if (NumSpecsCreated > 0)

652 dbgs() << "FnSpecialization: Created " << NumSpecsCreated

653 << " specializations in module " << M.getName() << "\n");

654

655 removeDeadFunctions();

656 cleanUpSSA();

657}

658

659

660

661

663 int64_t Value = C.getValue();

664

665 assert(Value >= 0 && "CodeSize and Latency cannot be negative");

666

667

668 return static_cast<unsigned>(Value);

669}

670

671

672

673

674

676

679 unsigned NumCandidates = 0;

681 if (!isCandidateFunction(&F))

682 continue;

683

684 auto [It, Inserted] = FunctionMetrics.try_emplace(&F);

686

687 if (Inserted) {

691 Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues);

692 }

693

694

695

696 const bool RequireMinSize =

699

700

701

702

703 if (Metrics.notDuplicatable || Metrics.NumInsts.isValid() ||

705 continue;

706

707

708

709

710

712 continue;

713

714 int64_t Sz = Metrics.NumInsts.getValue();

715 assert(Sz > 0 && "CodeSize should be positive");

716

717 unsigned FuncSize = static_cast<unsigned>(Sz);

718

719 LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "

720 << F.getName() << " is " << FuncSize << "\n");

721

722 if (Inserted && Metrics.isRecursive)

723 promoteConstantStackValues(&F);

724

725 if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {

727 dbgs() << "FnSpecialization: No possible specializations found for "

728 << F.getName() << "\n");

729 continue;

730 }

731

732 ++NumCandidates;

733 }

734

735 if (!NumCandidates) {

738 << "FnSpecialization: No possible specializations found in module\n");

739 return false;

740 }

741

742

743

744

745 auto CompareScore = [&AllSpecs](unsigned I, unsigned J) {

746 if (AllSpecs[I].Score != AllSpecs[J].Score)

747 return AllSpecs[I].Score > AllSpecs[J].Score;

748 return I > J;

749 };

750 const unsigned NSpecs =

751 std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size()));

753 std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0);

754 if (AllSpecs.size() > NSpecs) {

755 LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "

756 << "the maximum number of clones threshold.\n"

757 << "FnSpecialization: Specializing the "

758 << NSpecs

759 << " most profitable candidates.\n");

760 std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareScore);

761 for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) {

762 BestSpecs[NSpecs] = I;

763 std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore);

764 std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore);

765 }

766 }

767

768 LLVM_DEBUG(dbgs() << "FnSpecialization: List of specializations \n";

769 for (unsigned I = 0; I < NSpecs; ++I) {

770 const Spec &S = AllSpecs[BestSpecs[I]];

771 dbgs() << "FnSpecialization: Function " << S.F->getName()

772 << " , score " << S.Score << "\n";

774 dbgs() << "FnSpecialization: FormalArg = "

777 << "\n";

778 });

779

780

783 for (unsigned I = 0; I < NSpecs; ++I) {

784 Spec &S = AllSpecs[BestSpecs[I]];

785

786

787

788 FunctionGrowth[S.F] += S.CodeSize;

789

790 S.Clone = createSpecialization(S.F, S.Sig);

791

792

796 << " to call " << Clone->getName() << "\n");

797 Call->setCalledFunction(S.Clone);

798 auto &BFI = GetBFI(*Call->getFunction());

799 std::optional<uint64_t> Count =

800 BFI.getBlockProfileCount(Call->getParent());

802 std::optionalllvm::Function::ProfileCount MaybeCloneCount =

804 if (MaybeCloneCount) {

805 uint64_t CallCount = *Count + MaybeCloneCount->getCount();

807 if (std::optionalllvm::Function::ProfileCount MaybeOriginalCount =

809 uint64_t OriginalCount = MaybeOriginalCount->getCount();

810 if (OriginalCount >= *Count) {

812 } else {

813

814

816 }

817 }

818 }

819 }

820 }

821

823 OriginalFuncs.insert(S.F);

824 }

825

826 Solver.solveWhileResolvedUndefsIn(Clones);

827

828

829

830

831 for (Function *F : OriginalFuncs) {

832 auto [Begin, End] = SM[F];

833 updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End);

834 }

835

837 if (F->getReturnType()->isVoidTy())

838 continue;

839 if (F->getReturnType()->isStructTy()) {

841 if (!Solver.isStructLatticeConstant(F, STy))

842 continue;

843 } else {

844 auto It = Solver.getTrackedRetVals().find(F);

845 assert(It != Solver.getTrackedRetVals().end() &&

846 "Return value ought to be tracked");

848 continue;

849 }

850 for (User *U : F->users()) {

852

853 if (CS->getCalledFunction() != F)

854 continue;

855 Solver.resetLatticeValueFor(CS);

856 }

857 }

858 }

859

860

861 Solver.solveWhileResolvedUndefs();

862

863 for (Function *F : OriginalFuncs)

864 if (FunctionMetrics[F].isRecursive)

865 promoteConstantStackValues(F);

866

867 return true;

868}

869

870void FunctionSpecializer::removeDeadFunctions() {

871 for (Function *F : DeadFunctions) {

872 LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function "

873 << F->getName() << "\n");

875 FAM->clear(*F, F->getName());

876

877

878

881 "User of dead function must be call or invoke");

885 }

886 F->eraseFromParent();

887 }

888 DeadFunctions.clear();

889}

890

891

892

896 Clone->setName(F->getName() + ".specialized." + Twine(NSpecs));

898 return Clone;

899}

900

901bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,

904

905

906

907 DenseMap<SpecSig, unsigned> UniqueSpecs;

908

909

911 for (Argument &Arg : F->args())

912 if (isArgumentInteresting(&Arg))

913 Args.push_back(&Arg);

914

915 if (Args.empty())

916 return false;

917

918 for (User *U : F->users()) {

920 continue;

922

923

924 if (CS.getCalledFunction() != F)

925 continue;

926

927

928

929 if (CS.hasFnAttr(Attribute::MinSize))

930 continue;

931

932

933

934 if (!Solver.isBlockExecutable(CS.getParent()))

935 continue;

936

937

938

939 SpecSig S;

940 for (Argument *A : Args) {

941 Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));

942 if (C)

943 continue;

944 LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "

945 << A->getName() << " : " << C->getNameOrAsOperand()

946 << "\n");

947 S.Args.push_back({A, C});

948 }

949

950 if (S.Args.empty())

951 continue;

952

953

954 if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {

955

956

957

958

959

960

962 continue;

963 const unsigned Index = It->second;

965 } else {

966

968 unsigned Score = 0;

970 for (ArgInfo &A : S.Args) {

972 Score += getInliningBonus(A.Formal, A.Actual);

973 }

975

977 unsigned SpecSize = FuncSize - CodeSizeSavings;

978

979 auto IsProfitable = [&]() -> bool {

980

982 return true;

983

985 dbgs() << "FnSpecialization: Specialization bonus {Inlining = "

986 << Score << " (" << (Score * 100 / FuncSize) << "%)}\n");

987

988

990 return true;

991

993 dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "

994 << CodeSizeSavings << " ("

995 << (CodeSizeSavings * 100 / FuncSize) << "%)}\n");

996

997

999 return false;

1000

1001

1002 unsigned LatencySavings =

1004

1006 dbgs() << "FnSpecialization: Specialization bonus {Latency = "

1007 << LatencySavings << " ("

1008 << (LatencySavings * 100 / FuncSize) << "%)}\n");

1009

1010

1012 return false;

1013

1015 return false;

1016

1017 Score += std::max(CodeSizeSavings, LatencySavings);

1018 return true;

1019 };

1020

1021

1022 if (!IsProfitable())

1023 continue;

1024

1025

1026 auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);

1028 Spec.CallSites.push_back(&CS);

1029 const unsigned Index = AllSpecs.size() - 1;

1030 UniqueSpecs[S] = Index;

1031 if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)

1032 It->second.second = Index + 1;

1033 }

1034 }

1035

1036 return !UniqueSpecs.empty();

1037}

1038

1039bool FunctionSpecializer::isCandidateFunction(Function *F) {

1040 if (F->isDeclaration() || F->arg_empty())

1041 return false;

1042

1043 if (F->hasFnAttribute(Attribute::NoDuplicate))

1044 return false;

1045

1046

1047 if (Specializations.contains(F))

1048 return false;

1049

1050

1052 return false;

1053

1054

1055

1056 if (!Solver.isBlockExecutable(&F->getEntryBlock()))

1057 return false;

1058

1059

1060 if (F->hasFnAttribute(Attribute::AlwaysInline))

1061 return false;

1062

1063 LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()

1064 << "\n");

1065 return true;

1066}

1067

1071

1072

1073

1075

1078

1079

1080

1081

1082 Solver.setLatticeValueForSpecializationArguments(Clone, S.Args);

1083 Solver.markBlockExecutable(&Clone->front());

1084 Solver.addArgumentTrackedFunction(Clone);

1085 Solver.addTrackedFunction(Clone);

1086

1087

1088 Specializations.insert(Clone);

1089 ++NumSpecsCreated;

1090

1091 return Clone;

1092}

1093

1094

1095

1096

1097

1098unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) {

1100 if (!CalledFunction)

1101 return 0;

1102

1103

1104 auto &CalleeTTI = (GetTTI)(*CalledFunction);

1105

1106

1107

1108

1109

1110

1111 int InliningBonus = 0;

1112 for (User *U : A->users()) {

1114 continue;

1116 if (CS->getCalledOperand() != A)

1117 continue;

1118 if (CS->getFunctionType() != CalledFunction->getFunctionType())

1119 continue;

1120

1121

1122

1123

1124

1125

1126

1127

1128

1129

1132 InlineCost IC =

1133 getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);

1134

1135

1136

1138 InliningBonus += Params.DefaultThreshold;

1141

1142 LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << InliningBonus

1143 << " for user " << *U << "\n");

1144 }

1145

1146 return InliningBonus > 0 ? static_cast<unsigned>(InliningBonus) : 0;

1147}

1148

1149

1150

1151bool FunctionSpecializer::isArgumentInteresting(Argument *A) {

1152

1153 if (A->user_empty())

1154 return false;

1155

1156 Type *Ty = A->getType();

1159 return false;

1160

1161

1162

1163 if (A->hasByValAttr() && A->getParent()->onlyReadsMemory())

1164 return false;

1165

1166

1167 if (!Solver.isArgumentTrackedFunction(A->getParent()))

1168 return true;

1169

1170

1171

1172

1173 bool IsOverdefined = Ty->isStructTy()

1175 : SCCPSolver::isOverdefined(Solver.getLatticeValueFor(A));

1176

1178 if (IsOverdefined)

1179 dbgs() << "FnSpecialization: Found interesting parameter "

1180 << A->getNameOrAsOperand() << "\n";

1181 else

1182 dbgs() << "FnSpecialization: Nothing to do, parameter "

1183 << A->getNameOrAsOperand() << " is already constant\n";

1184 );

1185 return IsOverdefined;

1186}

1187

1188

1189

1190Constant *FunctionSpecializer::getCandidateConstant(Value *V) {

1192 return nullptr;

1193

1194

1195

1197 if (C)

1198 C = Solver.getConstantOrNull(V);

1199

1200

1201

1202 if (C && C->getType()->isPointerTy() && C->isNullValue())

1205 return nullptr;

1206

1207 return C;

1208}

1209

1210void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,

1211 const Spec *End) {

1212

1214 for (User *U : F->users())

1216 CS && CS->getCalledFunction() == F &&

1217 Solver.isBlockExecutable(CS->getParent()))

1219

1220 unsigned NCallsLeft = ToUpdate.size();

1221 for (CallBase *CS : ToUpdate) {

1222 bool ShouldDecrementCount = CS->getFunction() == F;

1223

1224

1225 const Spec *BestSpec = nullptr;

1226 for (const Spec &S : make_range(Begin, End)) {

1227 if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score))

1228 continue;

1229

1230 if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) {

1231 unsigned ArgNo = Arg.Formal->getArgNo();

1232 return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual;

1233 }))

1234 continue;

1235

1236 BestSpec = &S;

1237 }

1238

1239 if (BestSpec) {

1240 LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *CS

1241 << " to call " << BestSpec->Clone->getName() << "\n");

1242 CS->setCalledFunction(BestSpec->Clone);

1243 ShouldDecrementCount = true;

1244 }

1245

1246 if (ShouldDecrementCount)

1247 --NCallsLeft;

1248 }

1249

1250

1251

1252

1253

1254 if (NCallsLeft == 0 && Solver.isArgumentTrackedFunction(F) &&

1255 F->hasAddressTaken()) {

1256 Solver.markFunctionUnreachable(F);

1257 DeadFunctions.insert(F);

1258 }

1259}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static Function * cloneCandidateFunction(Function *F, unsigned NSpecs)

Clone the function F and remove the ssa_copy intrinsics added by the SCCPSolver in the cloned version...

Definition FunctionSpecialization.cpp:893

static void removeSSACopy(Function &F)

Definition FunctionSpecialization.cpp:616

static unsigned getCostValue(const Cost &C)

Get the unsigned Value of given Cost object.

Definition FunctionSpecialization.cpp:662

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

FunctionAnalysisManager FAM

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

an instruction to allocate memory on the stack

Type * getAllocatedType() const

Return the type that is being allocated by the instruction.

This class represents an incoming formal argument to a Function.

LLVM Basic Block Representation.

Conditional or Unconditional Branch instruction.

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

bool onlyReadsMemory(unsigned OpNo) const

Value * getArgOperand(unsigned i) const

void setArgOperand(unsigned i, Value *v)

iterator_range< User::op_iterator > args()

Iteration adapter for range-for loops.

unsigned getArgOperandNo(const Use *U) const

Given a use for a arg operand, get the arg operand number that corresponds to it.

This class represents a function call, abstracting a target machine's calling convention.

This is the base class for all instructions that perform data casts.

This class is the base class for the comparison instructions.

This is an important base class in LLVM.

iterator find(const_arg_type_t< KeyT > Val)

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Implements a dense probed hash-table based set.

This class represents a freeze function that returns random concrete value if an operand is either a ...

LLVM_ABI ~FunctionSpecializer()

Definition FunctionSpecialization.cpp:649

LLVM_ABI bool run()

Attempt to specialize functions in the module to enable constant propagation across function boundari...

Definition FunctionSpecialization.cpp:675

InstCostVisitor getInstCostVisitorFor(Function *F)

FunctionType * getFunctionType() const

Returns the FunctionType for me.

const BasicBlock & front() const

std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const

Get the entry count for this function.

void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)

Set the entry count for this function.

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

void setLinkage(LinkageTypes LT)

@ InternalLinkage

Rename collisions when linking (static functions).

int getCostDelta() const

Get the cost delta from the threshold for inlining.

LLVM_ABI Cost getLatencySavingsForKnownConstants()

Compute the latency savings from replacing all arguments with constants for a specialization candidat...

Definition FunctionSpecialization.cpp:195

LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C)

Compute the codesize savings for replacing argument A with constant C.

Definition FunctionSpecialization.cpp:169

LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs()

Definition FunctionSpecialization.cpp:157

bool isBlockExecutable(BasicBlock *BB) const

void visit(Iterator Start, Iterator End)

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

An instruction for reading from memory.

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

unsigned getNumIncomingValues() const

Return the number of incoming edges.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

static LLVM_ABI bool isOverdefined(const ValueLatticeElement &LV)

This class represents the LLVM 'select' instruction.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

@ TCK_CodeSize

Instruction code size.

@ TCK_Latency

The latency of instruction.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

bool isPointerTy() const

True if this is an instance of PointerType.

bool isStructTy() const

True if this is an instance of StructType.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

A Use represents the edge between a Value definition and its users.

LLVM_ABI Constant * getCompare(CmpInst::Predicate Pred, Type *Ty, const ValueLatticeElement &Other, const DataLayout &DL) const

true, false or undef constants, or nullptr if the comparison cannot be evaluated.

static ValueLatticeElement get(Constant *C)

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void setName(const Twine &Name)

Change the name of the value.

LLVM_ABI std::string getNameOrAsOperand() const

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

LLVM_ABI const Value * stripPointerCasts() const

Strip off pointer casts, all-zero GEPs and address space casts.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

std::pair< iterator, bool > insert(const ValueT &V)

const ParentTy * getParent() const

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

const int IndirectCallThreshold

initializer< Ty > init(const Ty &Val)

@ User

could "use" a pointer

This is an optimization pass for GlobalISel generic memory operations.

static cl::opt< unsigned > MinCodeSizeSavings("funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc("Reject specializations whose codesize savings are less than this " "much percent of the original function size"))

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

hash_code hash_value(const FixedPointSemantics &Val)

static cl::opt< bool > SpecializeOnAddress("funcspec-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values"))

LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)

canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

auto successors(const MachineBasicBlock *BB)

static cl::opt< unsigned > MaxIncomingPhiValues("funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden, cl::desc("The maximum number of incoming values a PHI node can have to be " "considered during the specialization bonus estimation"))

static cl::opt< bool > SpecializeLiteralConstant("funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant as an " "argument"))

DenseMap< Function *, std::pair< unsigned, unsigned > > SpecMap

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)

Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.

static cl::opt< unsigned > MaxCodeSizeGrowth("funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc("Maximum codesize growth allowed per function"))

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

static cl::opt< unsigned > MinLatencySavings("funcspec-min-latency-savings", cl::init(20), cl::Hidden, cl::desc("Reject specializations whose latency savings are less than this " "much percent of the original function size"))

LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)

ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...

static cl::opt< unsigned > MinFunctionSize("funcspec-min-function-size", cl::init(500), cl::Hidden, cl::desc("Don't specialize functions that have less than this number of " "instructions"))

static cl::opt< unsigned > MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100), cl::Hidden, cl::desc("The maximum number of iterations allowed " "when searching for transitive " "phis"))

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)

Attempt to constant fold a unary operation with the specified operand.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

FunctionAddr VTableAddr Count

LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)

Attempt to constant fold a cast with the specified operand.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)

Get an InlineCost object representing the cost of inlining this callsite.

static cl::opt< unsigned > MaxClones("funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " "specialization"))

LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)

Given operands for a BinaryOperator, fold the result or return null.

static cl::opt< bool > ForceSpecialization("force-specialization", cl::init(false), cl::Hidden, cl::desc("Force function specialization for every call site with a constant " "argument"))

static cl::opt< unsigned > MaxBlockPredecessors("funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc("The maximum number of predecessors a basic block can have to be " "considered during the estimation of dead code"))

LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Return true if this function can prove that V does not have undef bits and is never poison.

ArrayRef(const T &OneElt) -> ArrayRef< T >

LLVM_ABI InlineParams getInlineParams()

Generate the parameters to tune the inline cost analysis based only on the commandline options.

ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto predecessors(const MachineBasicBlock *BB)

cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))

LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)

Return the value that a load from C with offset Offset would produce if it is constant and determinab...

LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)

ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.

LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)

Return a copy of the specified function and add it to that function's module.

static cl::opt< unsigned > MinInliningBonus("funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc("Reject specializations whose inlining bonus is less than this " "much percent of the original function size"))

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Helper struct shared between Function Specialization and SCCP Solver.

Utility to calculate the size and a few similar metrics for a set of basic blocks.

static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)

Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).

static unsigned getHashValue(const SpecSig &S)

Definition FunctionSpecialization.cpp:640

static bool isEqual(const SpecSig &LHS, const SpecSig &RHS)

Definition FunctionSpecialization.cpp:644

static SpecSig getEmptyKey()

Definition FunctionSpecialization.cpp:636

static SpecSig getTombstoneKey()

Definition FunctionSpecialization.cpp:638

An information struct used to provide DenseMap with the various necessary components for a given valu...

SmallVector< ArgInfo, 4 > Args

SmallVector< CallBase * > CallSites