LLVM: lib/Transforms/IPO/FunctionSpecialization.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
23#include
24
25using namespace llvm;
26
27#define DEBUG_TYPE "function-specialization"
28
29STATISTIC(NumSpecsCreated, "Number of specializations created");
30
31namespace llvm {
32
36 "Force function specialization for every call site with a constant "
37 "argument"));
38
41 "The maximum number of clones allowed for a single function "
42 "specialization"));
43
47 cl::desc("The maximum number of iterations allowed "
48 "when searching for transitive "
49 "phis"));
50
53 cl::desc("The maximum number of incoming values a PHI node can have to be "
54 "considered during the specialization bonus estimation"));
55
58 "The maximum number of predecessors a basic block can have to be "
59 "considered during the estimation of dead code"));
60
63 cl::desc("Don't specialize functions that have less than this number of "
64 "instructions"));
65
68 "Maximum codesize growth allowed per function"));
69
72 cl::desc("Reject specializations whose codesize savings are less than this "
73 "much percent of the original function size"));
74
77 cl::desc("Reject specializations whose latency savings are less than this "
78 "much percent of the original function size"));
79
82 cl::desc("Reject specializations whose inlining bonus is less than this "
83 "much percent of the original function size"));
84
87 "Enable function specialization on the address of global values"));
88
92 "Enable specialization of functions that take a literal constant as an "
93 "argument"));
94
96
97}
98
99bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB,
101 unsigned I = 0;
105 });
106}
107
108
109
110
111
112
113
114Cost InstCostVisitor::estimateBasicBlocks(
117
118 while (!WorkList.empty()) {
120
121
122
123
124 assert(Solver.isBlockExecutable(BB) && "BB already found dead by IPSCCP!");
125 if (!DeadBlocks.insert(BB).second)
126 continue;
127
128 for (Instruction &I : *BB) {
129
130 if (KnownConstants.contains(&I))
131 continue;
132
134
136 << " for user " << I << "\n");
138 }
139
140
141
142 for (BasicBlock *SuccBB : successors(BB))
143 if (isBlockExecutable(SuccBB) && canEliminateSuccessor(BB, SuccBB))
145 }
147}
148
149Constant *InstCostVisitor::findConstantFor(Value *V) const {
151 return C;
152 if (auto *C = Solver.getConstantOrNull(V))
153 return C;
154 return KnownConstants.lookup(V);
155}
156
159 while (!PendingPHIs.empty()) {
160 Instruction *Phi = PendingPHIs.pop_back_val();
161
163 CodeSize += getCodeSizeSavingsForUser(Phi);
164 }
166}
167
168
170 LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
171 << C->getNameOrAsOperand() << "\n");
173 for (auto *U : A->users())
176 CodeSize += getCodeSizeSavingsForUser(UI, A, C);
177
178 LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
179 << CodeSize << "} for argument " << *A << "\n");
181}
182
183
184
185
186
187
188
189
190
191
192
193
194
196 auto &BFI = GetBFI(*F);
197 Cost TotalLatency = 0;
198
199 for (auto Pair : KnownConstants) {
201 if ()
202 continue;
203
204 uint64_t Weight = BFI.getBlockFreq(I->getParent()).getFrequency() /
205 BFI.getEntryFreq().getFrequency();
206
209
211 << "} for instruction " << *I << "\n");
212
214 }
215
216 return TotalLatency;
217}
218
221
223 return 0;
224
225
226 LastVisited = Use ? KnownConstants.insert({Use, C}).first
227 : KnownConstants.end();
228
231 CodeSize = estimateSwitchInst(*I);
233 CodeSize = estimateBranchInst(*I);
234 } else {
236 if ()
237 return 0;
238 }
239
240
241
242
243 KnownConstants.insert({User, C});
244
246
248 << "} for user " << *User << "\n");
249
250 for (auto *U : User->users())
253 CodeSize += getCodeSizeSavingsForUser(UI, User, C);
254
256}
257
259 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
260
261 if (I.getCondition() != LastVisited->first)
262 return 0;
263
265 if ()
266 return 0;
267
268 BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor();
269
270
271
273 for (const auto &Case : I.cases()) {
274 BasicBlock *BB = Case.getCaseSuccessor();
276 canEliminateSuccessor(I.getParent(), BB))
278 }
279
280 return estimateBasicBlocks(WorkList);
281}
282
284 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
285
286 if (I.getCondition() != LastVisited->first)
287 return 0;
288
289 BasicBlock *Succ = I.getSuccessor(LastVisited->second->isOneValue());
290
291
293 if (isBlockExecutable(Succ) && canEliminateSuccessor(I.getParent(), Succ))
295
296 return estimateBasicBlocks(WorkList);
297}
298
299bool InstCostVisitor::discoverTransitivelyIncomingValues(
301
304 unsigned Iter = 0;
305
306 while (!WorkList.empty()) {
308
311 return false;
312
313 if (!TransitivePHIs.insert(PN).second)
314 continue;
315
318
319
322 continue;
323
324 if (Constant *C = findConstantFor(V)) {
325
326 if (C != Const)
327 return false;
328 continue;
329 }
330
333 continue;
334 }
335
336
337 return false;
338 }
339 }
340 return true;
341}
342
345 return nullptr;
346
347 bool Inserted = VisitedPHIs.insert(&I).second;
349 bool HaveSeenIncomingPHI = false;
350
351 for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
352 Value *V = I.getIncomingValue(Idx);
353
354
357 continue;
358
359 if (Constant *C = findConstantFor(V)) {
360 if (!Const)
362
363 if (C != Const)
364 return nullptr;
365 continue;
366 }
367
368 if (Inserted) {
369
370
371 PendingPHIs.push_back(&I);
372 return nullptr;
373 }
374
376
377 HaveSeenIncomingPHI = true;
378 continue;
379 }
380
381
382 return nullptr;
383 }
384
385 if (!Const)
386 return nullptr;
387
388 if (!HaveSeenIncomingPHI)
390
391 DenseSet<PHINode *> TransitivePHIs;
392 if (!discoverTransitivelyIncomingValues(Const, &I, TransitivePHIs))
393 return nullptr;
394
396}
397
399 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
400
402 return LastVisited->second;
403 return nullptr;
404}
405
407 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
408
409 Function *F = I.getCalledFunction();
411 return nullptr;
412
414 Operands.reserve(I.getNumOperands());
415
416 for (unsigned Idx = 0, E = I.getNumOperands() - 1; Idx != E; ++Idx) {
417 Value *V = I.getOperand(Idx);
419 return nullptr;
420 Constant *C = findConstantFor(V);
421 if ()
422 return nullptr;
424 }
425
428}
429
431 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
432
434 return nullptr;
436}
437
440 Operands.reserve(I.getNumOperands());
441
442 for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) {
443 Value *V = I.getOperand(Idx);
444 Constant *C = findConstantFor(V);
445 if ()
446 return nullptr;
448 }
449
452}
453
455 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
456
457 if (I.getCondition() == LastVisited->first) {
458 Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue()
459 : I.getTrueValue();
460 return findConstantFor(V);
461 }
462 if (Constant *Condition = findConstantFor(I.getCondition()))
463 if ((I.getTrueValue() == LastVisited->first && Condition->isOneValue()) ||
464 (I.getFalseValue() == LastVisited->first && Condition->isZeroValue()))
465 return LastVisited->second;
466 return nullptr;
467}
468
471 I.getType(), DL);
472}
473
475 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
476
478 bool ConstOnRHS = I.getOperand(1) == LastVisited->first;
479 Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);
481
483 if (ConstOnRHS)
486 }
487
488
489
491 const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V);
492 auto &V1State = ConstOnRHS ? OtherLV : ConstLV;
493 auto &V2State = ConstOnRHS ? ConstLV : OtherLV;
494 return V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL);
495}
496
498 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
499
501}
502
504 assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
505
506 bool ConstOnRHS = I.getOperand(1) == LastVisited->first;
507 Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);
510 Value *ConstVal = LastVisited->second;
511
512 if (ConstOnRHS)
514
516 simplifyBinOp(I.getOpcode(), ConstVal, OtherVal, SimplifyQuery(DL)));
517}
518
519Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca,
521 Value *StoreValue = nullptr;
522 for (auto *User : Alloca->users()) {
523
524
525 if (User == Call)
526 continue;
527
529
530 if (StoreValue || Store->isVolatile())
531 return nullptr;
532 StoreValue = Store->getValueOperand();
533 continue;
534 }
535
536 return nullptr;
537 }
538
539 if (!StoreValue)
540 return nullptr;
541
542 return getCandidateConstant(StoreValue);
543}
544
545
546
547
550 if (!Val)
551 return nullptr;
554 return ConstVal;
557 return nullptr;
558 return getPromotableAlloca(Alloca, Call);
559}
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584void FunctionSpecializer::promoteConstantStackValues(Function *F) {
585 for (User *U : F->users()) {
586
589 continue;
590
591 if (!Solver.isBlockExecutable(Call->getParent()))
592 continue;
593
594 for (const Use &U : Call->args()) {
598
600 continue;
601
602 auto *ConstVal = getConstantStackValue(Call, ArgOp);
603 if (!ConstVal)
604 continue;
605
606 Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
608 "specialized.arg." + Twine(++NGlobals));
610 }
611 }
612}
613
614
615
620 if (!BC || BC->getType() != BC->getOperand(0)->getType())
621 continue;
622 Inst.replaceAllUsesWith(BC->getOperand(0));
623 Inst.eraseFromParent();
624 }
625 }
626}
627
628
629void FunctionSpecializer::cleanUpSSA() {
630 for (Function *F : Specializations)
632}
633
634
637
639
641 return static_cast<unsigned>(hash_value(S));
642 }
643
645 return LHS == RHS;
646 }
647};
648
651 if (NumSpecsCreated > 0)
652 dbgs() << "FnSpecialization: Created " << NumSpecsCreated
653 << " specializations in module " << M.getName() << "\n");
654
655 removeDeadFunctions();
656 cleanUpSSA();
657}
658
659
660
661
663 int64_t Value = C.getValue();
664
665 assert(Value >= 0 && "CodeSize and Latency cannot be negative");
666
667
668 return static_cast<unsigned>(Value);
669}
670
671
672
673
674
676
679 unsigned NumCandidates = 0;
681 if (!isCandidateFunction(&F))
682 continue;
683
684 auto [It, Inserted] = FunctionMetrics.try_emplace(&F);
686
687 if (Inserted) {
691 Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues);
692 }
693
694
695
696 const bool RequireMinSize =
699
700
701
702
703 if (Metrics.notDuplicatable || .NumInsts.isValid() ||
705 continue;
706
707
708
709
710
712 continue;
713
714 int64_t Sz = Metrics.NumInsts.getValue();
715 assert(Sz > 0 && "CodeSize should be positive");
716
717 unsigned FuncSize = static_cast<unsigned>(Sz);
718
719 LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
720 << F.getName() << " is " << FuncSize << "\n");
721
722 if (Inserted && Metrics.isRecursive)
723 promoteConstantStackValues(&F);
724
725 if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
727 dbgs() << "FnSpecialization: No possible specializations found for "
728 << F.getName() << "\n");
729 continue;
730 }
731
732 ++NumCandidates;
733 }
734
735 if (!NumCandidates) {
738 << "FnSpecialization: No possible specializations found in module\n");
739 return false;
740 }
741
742
743
744
745 auto CompareScore = [&AllSpecs](unsigned I, unsigned J) {
746 if (AllSpecs[I].Score != AllSpecs[J].Score)
747 return AllSpecs[I].Score > AllSpecs[J].Score;
748 return I > J;
749 };
750 const unsigned NSpecs =
751 std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size()));
753 std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0);
754 if (AllSpecs.size() > NSpecs) {
755 LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
756 << "the maximum number of clones threshold.\n"
757 << "FnSpecialization: Specializing the "
758 << NSpecs
759 << " most profitable candidates.\n");
760 std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareScore);
761 for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) {
762 BestSpecs[NSpecs] = I;
763 std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore);
764 std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore);
765 }
766 }
767
768 LLVM_DEBUG(dbgs() << "FnSpecialization: List of specializations \n";
769 for (unsigned I = 0; I < NSpecs; ++I) {
770 const Spec &S = AllSpecs[BestSpecs[I]];
771 dbgs() << "FnSpecialization: Function " << S.F->getName()
772 << " , score " << S.Score << "\n";
774 dbgs() << "FnSpecialization: FormalArg = "
777 << "\n";
778 });
779
780
783 for (unsigned I = 0; I < NSpecs; ++I) {
784 Spec &S = AllSpecs[BestSpecs[I]];
785
786
787
788 FunctionGrowth[S.F] += S.CodeSize;
789
790 S.Clone = createSpecialization(S.F, S.Sig);
791
792
796 << " to call " << Clone->getName() << "\n");
797 Call->setCalledFunction(S.Clone);
798 auto &BFI = GetBFI(*Call->getFunction());
799 std::optional<uint64_t> Count =
800 BFI.getBlockProfileCount(Call->getParent());
802 std::optionalllvm::Function::ProfileCount MaybeCloneCount =
804 if (MaybeCloneCount) {
805 uint64_t CallCount = *Count + MaybeCloneCount->getCount();
807 if (std::optionalllvm::Function::ProfileCount MaybeOriginalCount =
809 uint64_t OriginalCount = MaybeOriginalCount->getCount();
810 if (OriginalCount >= *Count) {
812 } else {
813
814
816 }
817 }
818 }
819 }
820 }
821
823 OriginalFuncs.insert(S.F);
824 }
825
826 Solver.solveWhileResolvedUndefsIn(Clones);
827
828
829
830
831 for (Function *F : OriginalFuncs) {
832 auto [Begin, End] = SM[F];
833 updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End);
834 }
835
837 if (F->getReturnType()->isVoidTy())
838 continue;
839 if (F->getReturnType()->isStructTy()) {
841 if (!Solver.isStructLatticeConstant(F, STy))
842 continue;
843 } else {
844 auto It = Solver.getTrackedRetVals().find(F);
845 assert(It != Solver.getTrackedRetVals().end() &&
846 "Return value ought to be tracked");
848 continue;
849 }
850 for (User *U : F->users()) {
852
853 if (CS->getCalledFunction() != F)
854 continue;
855 Solver.resetLatticeValueFor(CS);
856 }
857 }
858 }
859
860
861 Solver.solveWhileResolvedUndefs();
862
863 for (Function *F : OriginalFuncs)
864 if (FunctionMetrics[F].isRecursive)
865 promoteConstantStackValues(F);
866
867 return true;
868}
869
870void FunctionSpecializer::removeDeadFunctions() {
871 for (Function *F : DeadFunctions) {
872 LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function "
873 << F->getName() << "\n");
875 FAM->clear(*F, F->getName());
876
877
878
881 "User of dead function must be call or invoke");
885 }
886 F->eraseFromParent();
887 }
888 DeadFunctions.clear();
889}
890
891
892
896 Clone->setName(F->getName() + ".specialized." + Twine(NSpecs));
898 return Clone;
899}
900
901bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
904
905
906
907 DenseMap<SpecSig, unsigned> UniqueSpecs;
908
909
911 for (Argument &Arg : F->args())
912 if (isArgumentInteresting(&Arg))
913 Args.push_back(&Arg);
914
915 if (Args.empty())
916 return false;
917
918 for (User *U : F->users()) {
920 continue;
922
923
924 if (CS.getCalledFunction() != F)
925 continue;
926
927
928
929 if (CS.hasFnAttr(Attribute::MinSize))
930 continue;
931
932
933
934 if (!Solver.isBlockExecutable(CS.getParent()))
935 continue;
936
937
938
939 SpecSig S;
940 for (Argument *A : Args) {
941 Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));
942 if ()
943 continue;
944 LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
945 << A->getName() << " : " << C->getNameOrAsOperand()
946 << "\n");
948 }
949
950 if (S.Args.empty())
951 continue;
952
953
954 if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
955
956
957
958
959
960
962 continue;
963 const unsigned Index = It->second;
965 } else {
966
968 unsigned Score = 0;
970 for (ArgInfo &A : S.Args) {
972 Score += getInliningBonus(A.Formal, A.Actual);
973 }
975
977 unsigned SpecSize = FuncSize - CodeSizeSavings;
978
979 auto IsProfitable = [&]() -> bool {
980
982 return true;
983
985 dbgs() << "FnSpecialization: Specialization bonus {Inlining = "
986 << Score << " (" << (Score * 100 / FuncSize) << "%)}\n");
987
988
990 return true;
991
993 dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
994 << CodeSizeSavings << " ("
995 << (CodeSizeSavings * 100 / FuncSize) << "%)}\n");
996
997
999 return false;
1000
1001
1002 unsigned LatencySavings =
1004
1006 dbgs() << "FnSpecialization: Specialization bonus {Latency = "
1007 << LatencySavings << " ("
1008 << (LatencySavings * 100 / FuncSize) << "%)}\n");
1009
1010
1012 return false;
1013
1015 return false;
1016
1017 Score += std::max(CodeSizeSavings, LatencySavings);
1018 return true;
1019 };
1020
1021
1022 if (!IsProfitable())
1023 continue;
1024
1025
1026 auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);
1028 Spec.CallSites.push_back(&CS);
1029 const unsigned Index = AllSpecs.size() - 1;
1030 UniqueSpecs[S] = Index;
1031 if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
1032 It->second.second = Index + 1;
1033 }
1034 }
1035
1036 return !UniqueSpecs.empty();
1037}
1038
1039bool FunctionSpecializer::isCandidateFunction(Function *F) {
1040 if (F->isDeclaration() || F->arg_empty())
1041 return false;
1042
1043 if (F->hasFnAttribute(Attribute::NoDuplicate))
1044 return false;
1045
1046
1047 if (Specializations.contains(F))
1048 return false;
1049
1050
1052 return false;
1053
1054
1055
1056 if (!Solver.isBlockExecutable(&F->getEntryBlock()))
1057 return false;
1058
1059
1060 if (F->hasFnAttribute(Attribute::AlwaysInline))
1061 return false;
1062
1063 LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
1064 << "\n");
1065 return true;
1066}
1067
1071
1072
1073
1075
1078
1079
1080
1081
1082 Solver.setLatticeValueForSpecializationArguments(Clone, S.Args);
1083 Solver.markBlockExecutable(&Clone->front());
1084 Solver.addArgumentTrackedFunction(Clone);
1085 Solver.addTrackedFunction(Clone);
1086
1087
1088 Specializations.insert(Clone);
1089 ++NumSpecsCreated;
1090
1091 return Clone;
1092}
1093
1094
1095
1096
1097
1098unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) {
1100 if (!CalledFunction)
1101 return 0;
1102
1103
1104 auto &CalleeTTI = (GetTTI)(*CalledFunction);
1105
1106
1107
1108
1109
1110
1111 int InliningBonus = 0;
1112 for (User *U : A->users()) {
1114 continue;
1116 if (CS->getCalledOperand() != A)
1117 continue;
1118 if (CS->getFunctionType() != CalledFunction->getFunctionType())
1119 continue;
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1132 InlineCost IC =
1133 getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);
1134
1135
1136
1138 InliningBonus += Params.DefaultThreshold;
1141
1142 LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << InliningBonus
1143 << " for user " << *U << "\n");
1144 }
1145
1146 return InliningBonus > 0 ? static_cast<unsigned>(InliningBonus) : 0;
1147}
1148
1149
1150
1151bool FunctionSpecializer::isArgumentInteresting(Argument *A) {
1152
1153 if (A->user_empty())
1154 return false;
1155
1159 return false;
1160
1161
1162
1163 if (A->hasByValAttr() && ->getParent()->onlyReadsMemory())
1164 return false;
1165
1166
1167 if (!Solver.isArgumentTrackedFunction(A->getParent()))
1168 return true;
1169
1170
1171
1172
1173 bool IsOverdefined = Ty->isStructTy()
1175 : SCCPSolver::isOverdefined(Solver.getLatticeValueFor(A));
1176
1178 if (IsOverdefined)
1179 dbgs() << "FnSpecialization: Found interesting parameter "
1180 << A->getNameOrAsOperand() << "\n";
1181 else
1182 dbgs() << "FnSpecialization: Nothing to do, parameter "
1183 << A->getNameOrAsOperand() << " is already constant\n";
1184 );
1185 return IsOverdefined;
1186}
1187
1188
1189
1190Constant *FunctionSpecializer::getCandidateConstant(Value *V) {
1192 return nullptr;
1193
1194
1195
1197 if ()
1198 C = Solver.getConstantOrNull(V);
1199
1200
1201
1202 if (C && C->getType()->isPointerTy() && ->isNullValue())
1205 return nullptr;
1206
1207 return C;
1208}
1209
1210void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
1211 const Spec *End) {
1212
1214 for (User *U : F->users())
1216 CS && CS->getCalledFunction() == F &&
1217 Solver.isBlockExecutable(CS->getParent()))
1219
1220 unsigned NCallsLeft = ToUpdate.size();
1221 for (CallBase *CS : ToUpdate) {
1222 bool ShouldDecrementCount = CS->getFunction() == F;
1223
1224
1225 const Spec *BestSpec = nullptr;
1226 for (const Spec &S : make_range(Begin, End)) {
1227 if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score))
1228 continue;
1229
1230 if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) {
1231 unsigned ArgNo = Arg.Formal->getArgNo();
1232 return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual;
1233 }))
1234 continue;
1235
1236 BestSpec = &S;
1237 }
1238
1239 if (BestSpec) {
1240 LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *CS
1241 << " to call " << BestSpec->Clone->getName() << "\n");
1242 CS->setCalledFunction(BestSpec->Clone);
1243 ShouldDecrementCount = true;
1244 }
1245
1246 if (ShouldDecrementCount)
1247 --NCallsLeft;
1248 }
1249
1250
1251
1252
1253
1254 if (NCallsLeft == 0 && Solver.isArgumentTrackedFunction(F) &&
1255 ->hasAddressTaken()) {
1256 Solver.markFunctionUnreachable(F);
1257 DeadFunctions.insert(F);
1258 }
1259}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static Function * cloneCandidateFunction(Function *F, unsigned NSpecs)
Clone the function F and remove the ssa_copy intrinsics added by the SCCPSolver in the cloned version...
Definition FunctionSpecialization.cpp:893
static void removeSSACopy(Function &F)
Definition FunctionSpecialization.cpp:616
static unsigned getCostValue(const Cost &C)
Get the unsigned Value of given Cost object.
Definition FunctionSpecialization.cpp:662
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
FunctionAnalysisManager FAM
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
This class represents an incoming formal argument to a Function.
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool onlyReadsMemory(unsigned OpNo) const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned getArgOperandNo(const Use *U) const
Given a use for a arg operand, get the arg operand number that corresponds to it.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
This is an important base class in LLVM.
iterator find(const_arg_type_t< KeyT > Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
This class represents a freeze function that returns random concrete value if an operand is either a ...
LLVM_ABI ~FunctionSpecializer()
Definition FunctionSpecialization.cpp:649
LLVM_ABI bool run()
Attempt to specialize functions in the module to enable constant propagation across function boundari...
Definition FunctionSpecialization.cpp:675
InstCostVisitor getInstCostVisitorFor(Function *F)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & front() const
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
void setLinkage(LinkageTypes LT)
@ InternalLinkage
Rename collisions when linking (static functions).
int getCostDelta() const
Get the cost delta from the threshold for inlining.
LLVM_ABI Cost getLatencySavingsForKnownConstants()
Compute the latency savings from replacing all arguments with constants for a specialization candidat...
Definition FunctionSpecialization.cpp:195
LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C)
Compute the codesize savings for replacing argument A with constant C.
Definition FunctionSpecialization.cpp:169
LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs()
Definition FunctionSpecialization.cpp:157
bool isBlockExecutable(BasicBlock *BB) const
void visit(Iterator Start, Iterator End)
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
An instruction for reading from memory.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static LLVM_ABI bool isOverdefined(const ValueLatticeElement &LV)
This class represents the LLVM 'select' instruction.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
@ TCK_CodeSize
Instruction code size.
@ TCK_Latency
The latency of instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool isPointerTy() const
True if this is an instance of PointerType.
bool isStructTy() const
True if this is an instance of StructType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
LLVM_ABI Constant * getCompare(CmpInst::Predicate Pred, Type *Ty, const ValueLatticeElement &Other, const DataLayout &DL) const
true, false or undef constants, or nullptr if the comparison cannot be evaluated.
static ValueLatticeElement get(Constant *C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI std::string getNameOrAsOperand() const
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
const int IndirectCallThreshold
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< unsigned > MinCodeSizeSavings("funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc("Reject specializations whose codesize savings are less than this " "much percent of the original function size"))
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
static cl::opt< bool > SpecializeOnAddress("funcspec-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values"))
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
static cl::opt< unsigned > MaxIncomingPhiValues("funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden, cl::desc("The maximum number of incoming values a PHI node can have to be " "considered during the specialization bonus estimation"))
static cl::opt< bool > SpecializeLiteralConstant("funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant as an " "argument"))
DenseMap< Function *, std::pair< unsigned, unsigned > > SpecMap
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
static cl::opt< unsigned > MaxCodeSizeGrowth("funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc("Maximum codesize growth allowed per function"))
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
static cl::opt< unsigned > MinLatencySavings("funcspec-min-latency-savings", cl::init(20), cl::Hidden, cl::desc("Reject specializations whose latency savings are less than this " "much percent of the original function size"))
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
static cl::opt< unsigned > MinFunctionSize("funcspec-min-function-size", cl::init(500), cl::Hidden, cl::desc("Don't specialize functions that have less than this number of " "instructions"))
static cl::opt< unsigned > MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100), cl::Hidden, cl::desc("The maximum number of iterations allowed " "when searching for transitive " "phis"))
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
static cl::opt< unsigned > MaxClones("funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " "specialization"))
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
static cl::opt< bool > ForceSpecialization("force-specialization", cl::init(false), cl::Hidden, cl::desc("Force function specialization for every call site with a constant " "argument"))
static cl::opt< unsigned > MaxBlockPredecessors("funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc("The maximum number of predecessors a basic block can have to be " "considered during the estimation of dead code"))
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
static cl::opt< unsigned > MinInliningBonus("funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc("Reject specializations whose inlining bonus is less than this " "much percent of the original function size"))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct shared between Function Specialization and SCCP Solver.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
static unsigned getHashValue(const SpecSig &S)
Definition FunctionSpecialization.cpp:640
static bool isEqual(const SpecSig &LHS, const SpecSig &RHS)
Definition FunctionSpecialization.cpp:644
static SpecSig getEmptyKey()
Definition FunctionSpecialization.cpp:636
static SpecSig getTombstoneKey()
Definition FunctionSpecialization.cpp:638
An information struct used to provide DenseMap with the various necessary components for a given valu...
SmallVector< ArgInfo, 4 > Args
SmallVector< CallBase * > CallSites