LLVM: lib/CodeGen/SelectOptimize.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
40#include
41#include
42#include
43
44using namespace llvm;
46
47#define DEBUG_TYPE "select-optimize"
48
50 "Number of select groups considered for conversion to branch");
51STATISTIC(NumSelectConvertedExpColdOperand,
52 "Number of select groups converted due to expensive cold operand");
54 "Number of select groups converted due to high-predictability");
56 "Number of select groups not converted due to unpredictability");
58 "Number of select groups not converted due to cold basic block");
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted, "Number of selects converted");
62
64 "cold-operand-threshold",
65 cl::desc("Maximum frequency of path for an operand to be considered cold."),
67
69 "cold-operand-max-cost-multiplier",
70 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
71 "slice of a cold operand to be considered inexpensive."),
73
76 cl::desc("Gradient gain threshold (%)."),
78
81 cl::desc("Minimum gain per loop (in cycles) threshold."),
83
85 "select-opti-loop-relative-gain-threshold",
87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
89
92 cl::desc("Default mispredict rate (initialized to 25%)."));
93
97 cl::desc("Disable loop-level heuristics."));
98
99namespace {
100
101class SelectOptimizeImpl {
106 const LoopInfo *LI = nullptr;
111
112public:
113 SelectOptimizeImpl() = default;
114 SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
117
119
120 struct CostInfo {
121
122 Scaled64 PredCost;
123
124 Scaled64 NonPredCost;
125 };
126
127
128
129
130 class SelectLike {
131
133
134
135 bool Inverted = false;
136
137
138
139 unsigned CondIdx;
140
141 public:
142 SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)
143 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
144
146 const Instruction *getI() const { return I; }
147
148 Type *getType() const { return I->getType(); }
149
150 unsigned getConditionOpIndex() { return CondIdx; };
151
152
153
154
155
156 Value *getTrueValue(bool HonorInverts = true) const {
157 if (Inverted && HonorInverts)
158 return getFalseValue(false);
159 if (auto *Sel = dyn_cast(I))
160 return Sel->getTrueValue();
161
162
163 if (isa(I))
164 return nullptr;
165
167 }
168
169
170
171
172 Value *getFalseValue(bool HonorInverts = true) const {
173 if (Inverted && HonorInverts)
174 return getTrueValue(false);
175 if (auto *Sel = dyn_cast(I))
176 return Sel->getFalseValue();
177
178
179
180 if (auto *BO = dyn_cast(I))
181 return BO->getOperand(1 - CondIdx);
182
184 }
185
186
187
188
189 Scaled64 getOpCostOnBranch(
192 auto *V = IsTrue ? getTrueValue() : getFalseValue();
193 if (V) {
194 if (auto *IV = dyn_cast(V)) {
195 auto It = InstCostMap.find(IV);
196 return It != InstCostMap.end() ? It->second.NonPredCost
198 }
200 }
201
202
203
204
207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
210 if (auto *OpI = dyn_cast(I->getOperand(1 - CondIdx))) {
211 auto It = InstCostMap.find(OpI);
212 if (It != InstCostMap.end())
213 TotalCost += It->second.NonPredCost;
214 }
215 return TotalCost;
216 }
217 };
218
219private:
220
221
222
223 struct SelectGroup {
224 Value *Condition;
226 };
228
229
230
231 bool optimizeSelects(Function &F);
232
233
234
235
236
237 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
238 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
239
240
241
242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
243
244
245 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
246
247
248
249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
250 SelectGroups &ProfSIGroups);
251 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
252 SelectGroups &ProfSIGroups);
253
254
255
256 bool isConvertToBranchProfitableBase(const SelectGroup &ASI);
257
258
259
260
261 bool hasExpensiveColdOperand(const SelectGroup &ASI);
262
263
264
265
266 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
267 Instruction *SI, bool ForSinking = false);
268
269
270 bool isSelectHighlyPredictable(const SelectLike SI);
271
272
273
274 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
275
276
277
278 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
280 CostInfo *LoopCost);
281
282
284 getSImap(const SelectGroups &SIGroups);
285
286
287
289 getSGmap(const SelectGroups &SIGroups);
290
291
292 std::optional<uint64_t> computeInstCost(const Instruction *I);
293
294
295 Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);
296
297
298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
299 const SelectLike SI);
300
301
302 bool isSelectKindSupported(const SelectLike SI);
303};
304
306 SelectOptimizeImpl Impl;
307
308public:
309 static char ID;
310
313 }
314
316 return Impl.runOnFunction(F, *this);
317 }
318
326 }
327};
328
329}
330
333 SelectOptimizeImpl Impl(TM);
335}
336
337char SelectOptimize::ID = 0;
338
340 false)
349
351
354 TSI = TM->getSubtargetImpl(F);
356
357
358
359
364
368
370 .getCachedResult(*F.getParent());
371 assert(PSI && "This pass requires module analysis pass `profile-summary`!");
373
374
377
380 TSchedModel.init(TSI);
381
382 bool Changed = optimizeSelects(F);
384}
385
386bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
388 TSI = TM->getSubtargetImpl(F);
390
391
392
393
397 return false;
398
400
402 return false;
403
408 TSchedModel.init(TSI);
409
410
412 return false;
413
414 return optimizeSelects(F);
415}
416
417bool SelectOptimizeImpl::optimizeSelects(Function &F) {
418
419 SelectGroups ProfSIGroups;
420
421 optimizeSelectsBase(F, ProfSIGroups);
422
423 optimizeSelectsInnerLoops(F, ProfSIGroups);
424
425
426
427 convertProfitableSIGroups(ProfSIGroups);
428
429
430 return !ProfSIGroups.empty();
431}
432
433void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
434 SelectGroups &ProfSIGroups) {
435
436 SelectGroups SIGroups;
438
440 if (L && L->isInnermost())
441 continue;
442 collectSelectGroups(BB, SIGroups);
443 }
444
445
446 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
447}
448
449void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
450 SelectGroups &ProfSIGroups) {
452
453 for (unsigned long i = 0; i < Loops.size(); ++i)
454 for (Loop *ChildL : Loops[i]->getSubLoops())
455 Loops.push_back(ChildL);
456
458 if (->isInnermost())
459 continue;
460
461 SelectGroups SIGroups;
463 collectSelectGroups(*BB, SIGroups);
464
465 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
466 }
467}
468
469
470
471
472
473
474
475
476
477
478
479
481 SelectOptimizeImpl::SelectLike &SI, bool isTrue,
484 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();
485 if (V) {
486 auto *IV = dyn_cast(V);
487 if (IV && OptSelects.count(IV))
488 return isTrue ? OptSelects[IV].first : OptSelects[IV].second;
489 return V;
490 }
491
492 auto *BO = cast(SI.getI());
493 assert((BO->getOpcode() == Instruction::Add ||
494 BO->getOpcode() == Instruction::Or ||
495 BO->getOpcode() == Instruction::Sub) &&
496 "Only currently handling Add, Or and Sub binary operators.");
497
498 auto *CBO = BO->clone();
499 auto CondIdx = SI.getConditionOpIndex();
500 auto *AuxI = cast(CBO->getOperand(CondIdx));
501 if (isa(AuxI) || isa(AuxI)) {
502 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
503 } else {
504 assert((isa(AuxI) || isa(AuxI)) &&
505 "Unexpected opcode");
506 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));
507 }
508
509 unsigned OtherIdx = 1 - CondIdx;
510 if (auto *IV = dyn_cast(CBO->getOperand(OtherIdx))) {
511 if (OptSelects.count(IV))
512 CBO->setOperand(OtherIdx,
513 isTrue ? OptSelects[IV].first : OptSelects[IV].second);
514 }
515 CBO->insertBefore(B->getTerminator());
516 return CBO;
517}
518
519void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
520 for (SelectGroup &ASI : ProfSIGroups) {
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
560 typedef std::stack<Instruction *>::size_type StackSizeType;
561 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
562 for (SelectLike &SI : ASI.Selects) {
563 if (!isa(SI.getI()))
564 continue;
565
566
567 if (auto *TI = dyn_cast_or_null(SI.getTrueValue())) {
568 std::stack<Instruction *> TrueSlice;
569 getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);
570 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
572 }
573 if (auto *FI = dyn_cast_or_null(SI.getFalseValue())) {
574 if (isa(SI.getI()) || !FI->hasOneUse()) {
575 std::stack<Instruction *> FalseSlice;
576 getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);
577 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
578 FalseSlices.push_back(FalseSlice);
579 }
580 }
581 }
582
583
584
585
586
587
588
589
590
591
593 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
594 for (auto &S : TrueSlices) {
595 if (!S.empty()) {
596 TrueSlicesInterleaved.push_back(S.top());
597 S.pop();
598 }
599 }
600 }
601 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
602 for (auto &S : FalseSlices) {
603 if (!S.empty()) {
604 FalseSlicesInterleaved.push_back(S.top());
605 S.pop();
606 }
607 }
608 }
609
610
611 SelectLike &SI = ASI.Selects.front();
612 SelectLike &LastSI = ASI.Selects.back();
613 BasicBlock *StartBlock = SI.getI()->getParent();
615
616
617
618
619
620 SplitPt.setHeadBit(true);
622 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
623
625
626
627
629 auto DIt = SI.getI()->getIterator();
630 auto NIt = ASI.Selects.begin();
631 while (&*DIt != LastSI.getI()) {
632 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
633 ++NIt;
634 else
636 DIt++;
637 }
639 for (auto *DI : SinkInstrs)
641
642
643
644 auto TransferDbgRecords = [&](Instruction &I) {
650 }
651 };
652
653
654
655
656 auto R = make_range(std::next(SI.getI()->getIterator()),
657 std::next(LastSI.getI()->getIterator()));
659
660
661
662 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
663 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
664
665 auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
666 for (auto &SL : SG.Selects) {
667 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)
668 return true;
669 }
670 return false;
671 };
672 if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
674 EndBlock->getParent(), EndBlock);
676 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
677 for (Instruction *TrueInst : TrueSlicesInterleaved)
678 TrueInst->moveBefore(TrueBranch);
679 }
680 if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {
681 FalseBlock =
683 EndBlock->getParent(), EndBlock);
685 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
686 for (Instruction *FalseInst : FalseSlicesInterleaved)
687 FalseInst->moveBefore(FalseBranch);
688 }
689
690
691 if (TrueBlock == FalseBlock) {
692 assert(TrueBlock == nullptr &&
693 "Unexpected basic block transform while optimizing select");
694
696 EndBlock->getParent(), EndBlock);
698 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
699 }
700
701
702
703
704
705
707 if (TrueBlock == nullptr) {
708 TT = EndBlock;
709 FT = FalseBlock;
710 TrueBlock = StartBlock;
711 } else if (FalseBlock == nullptr) {
712 TT = TrueBlock;
713 FT = EndBlock;
714 FalseBlock = StartBlock;
715 } else {
716 TT = TrueBlock;
717 FT = FalseBlock;
718 }
720 auto *CondFr =
721 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");
722
724
725
726
727
729 for (SelectLike &SI : ASI.Selects) {
730
734
735
737 for (auto &SG : ProfSIGroups) {
738 if (SG.Condition == SI.getI())
739 SG.Condition = PN;
740 }
741 }
742 SI.getI()->replaceAllUsesWith(PN);
745 INS[PN] = {TV, FV};
749 ++NumSelectsConverted;
750 }
751 IB.CreateCondBr(CondFr, TT, FT, SI.getI());
752
753
754 for (SelectLike &SI : ASI.Selects)
755 SI.getI()->eraseFromParent();
756 }
757}
758
759void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
760 SelectGroups &SIGroups) {
761
762
763
764
765
766
767
768
769
770 struct SelectLikeInfo {
772 bool IsAuxiliary;
773 bool IsInverted;
774 unsigned ConditionIdx;
775 };
776
778
779
781
782
783
784
785 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {
786 if (auto *Cmp = dyn_cast(I)) {
788 return SelectInfo.end();
789 }
790
793 Cond->getType()->isIntegerTy(1)) {
795 return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;
796 }
797
799 return SelectInfo.insert({I, {Cond, true, true, 0}}).first;
800 }
801
802
805 return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;
806 }
810 I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {
811 for (auto *CmpI : SeenCmp) {
812 auto Pred = CmpI->getPredicate();
813 if (Val != CmpI->getOperand(0))
814 continue;
816 match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
822 match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
823 bool Inverted =
825 return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;
826 }
827 }
828 return SelectInfo.end();
829 }
830
831
832
833
834
836 auto MatchZExtOrSExtPattern =
838 auto MatchShiftPattern =
840
841
842
843 if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||
844 (match(I, MatchShiftPattern) &&
845 X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {
846 if (I->getOpcode() != Instruction::Add &&
847 I->getOpcode() != Instruction::Sub &&
848 I->getOpcode() != Instruction::Or)
849 return SelectInfo.end();
850
851 if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))
852 return SelectInfo.end();
853
854
855
856
857
858 unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;
860 auto *Op = I->getOperand(Idx);
861 auto It = SelectInfo.find(Op);
862 if (It != SelectInfo.end() && It->second.IsAuxiliary) {
863 Cond = It->second.Cond;
864 bool Inverted = It->second.IsInverted;
865 return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;
866 }
867 }
868 }
869 return SelectInfo.end();
870 };
871
872 bool AlreadyProcessed = false;
875 while (BBIt != BB.end()) {
877 if (I->isDebugOrPseudoInst())
878 continue;
879
880 if (!AlreadyProcessed)
881 It = ProcessSelectInfo(I);
882 else
883 AlreadyProcessed = false;
884
885 if (It == SelectInfo.end() || It->second.IsAuxiliary)
886 continue;
887
889 continue;
890
892
893 if (->getType()->isIntegerTy(1))
894 continue;
895
896 SelectGroup SIGroup = {Cond, {}};
897 SIGroup.Selects.emplace_back(I, It->second.IsInverted,
898 It->second.ConditionIdx);
899
900
901
902 if (!isSelectKindSupported(SIGroup.Selects.front()))
903 continue;
904
905 while (BBIt != BB.end()) {
907
908
910 ++BBIt;
911 continue;
912 }
913
914 It = ProcessSelectInfo(NI);
915 if (It == SelectInfo.end()) {
916 AlreadyProcessed = true;
917 break;
918 }
919
920
921 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
922 if (Cond != CurrCond) {
923 AlreadyProcessed = true;
924 break;
925 }
926
927 if (!IsAux)
928 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
929 ++BBIt;
930 }
932 dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";
933 for (auto &SI : SIGroup.Selects)
934 dbgs() << " " << *SI.getI() << "\n";
935 });
936
937 SIGroups.push_back(SIGroup);
938 }
939}
940
941void SelectOptimizeImpl::findProfitableSIGroupsBase(
942 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
943 for (SelectGroup &ASI : SIGroups) {
944 ++NumSelectOptAnalyzed;
945 if (isConvertToBranchProfitableBase(ASI))
946 ProfSIGroups.push_back(ASI);
947 }
948}
949
953 ORE->emit(Rem);
954}
955
956void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
957 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
958 NumSelectOptAnalyzed += SIGroups.size();
959
960
961
962
963
964
965
966
967
968
970 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
971 {Scaled64::getZero(), Scaled64::getZero()}};
972 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
973 !checkLoopHeuristics(L, LoopCost)) {
974 return;
975 }
976
977 for (SelectGroup &ASI : SIGroups) {
978
979
980 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
981 for (SelectLike &SI : ASI.Selects) {
982 SelectCost = std::max(SelectCost, InstCostMap[SI.getI()].PredCost);
983 BranchCost = std::max(BranchCost, InstCostMap[SI.getI()].NonPredCost);
984 }
985 if (BranchCost < SelectCost) {
987 ASI.Selects.front().getI());
988 OR << "Profitable to convert to branch (loop analysis). BranchCost="
989 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
990 << ". ";
992 ++NumSelectConvertedLoop;
993 ProfSIGroups.push_back(ASI);
994 } else {
996 ASI.Selects.front().getI());
997 ORmiss << "Select is more profitable (loop analysis). BranchCost="
998 << BranchCost.toString()
999 << ", SelectCost=" << SelectCost.toString() << ". ";
1001 }
1002 }
1003}
1004
1005bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1006 const SelectGroup &ASI) {
1007 const SelectLike &SI = ASI.Selects.front();
1008 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()
1009 << "\n");
1012
1013
1014 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {
1015 ++NumSelectColdBB;
1016 ORmiss << "Not converted to branch because of cold basic block. ";
1018 return false;
1019 }
1020
1021
1022 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1023 ++NumSelectUnPred;
1024 ORmiss << "Not converted to branch because of unpredictable branch. ";
1026 return false;
1027 }
1028
1029
1030
1032 ++NumSelectConvertedHighPred;
1033 OR << "Converted to branch because of highly predictable branch. ";
1035 return true;
1036 }
1037
1038
1039
1040 if (hasExpensiveColdOperand(ASI)) {
1041 ++NumSelectConvertedExpColdOperand;
1042 OR << "Converted to branch because of expensive cold operand.";
1044 return true;
1045 }
1046
1047
1048
1049
1050 auto *BB = SI.getI()->getParent();
1052 if (L && ->isInnermost() && L->getLoopLatch() == BB &&
1053 ASI.Selects.size() >= 3) {
1054 OR << "Converted to branch because select group in the latch block is big.";
1056 return true;
1057 }
1058
1059 ORmiss << "Not profitable to convert to branch (base heuristic).";
1061 return false;
1062}
1063
1066 return (Numerator + (Denominator / 2)) / Denominator;
1067}
1068
1071 if (isa(SI.getI()))
1073 return false;
1074}
1075
1076bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {
1077 bool ColdOperand = false;
1078 uint64_t TrueWeight, FalseWeight, TotalWeight;
1080 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1081 TotalWeight = TrueWeight + FalseWeight;
1082
1086 ASI.Selects.front().getI());
1087 ORmiss << "Profile data available but missing branch-weights metadata for "
1088 "select instruction. ";
1090 }
1091 if (!ColdOperand)
1092 return false;
1093
1094
1095 for (SelectLike SI : ASI.Selects) {
1098 if (TrueWeight < FalseWeight) {
1099 ColdI = dyn_cast_or_null(SI.getTrueValue());
1100 HotWeight = FalseWeight;
1101 } else {
1102 ColdI = dyn_cast_or_null(SI.getFalseValue());
1103 HotWeight = TrueWeight;
1104 }
1105 if (ColdI) {
1106 std::stack<Instruction *> ColdSlice;
1107 getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());
1109 while (!ColdSlice.empty()) {
1112 ColdSlice.pop();
1113 }
1114
1115
1116
1117
1119 divideNearest(SliceCost * HotWeight, TotalWeight);
1120 if (AdjSliceCost >=
1122 return true;
1123 }
1124 }
1125 return false;
1126}
1127
1128
1129
1130
1132
1133 if (LoadI->getParent() != SI->getParent())
1134 return false;
1136 while (&*It != SI) {
1137 if (It->mayWriteToMemory())
1138 return false;
1139 It++;
1140 }
1141 return true;
1142}
1143
1144
1145
1146
1147
1148
1149
1150void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
1151 std::stack<Instruction *> &Slice,
1153 bool ForSinking) {
1155 std::queue<Instruction *> Worklist;
1156 Worklist.push(I);
1157 while (!Worklist.empty()) {
1159 Worklist.pop();
1160
1161
1162 if (!Visited.insert(II).second)
1163 continue;
1164
1165 if (->hasOneUse())
1166 continue;
1167
1168
1169
1170
1171 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
1173 continue;
1174
1175
1176
1177
1178
1180 continue;
1181
1182
1183
1184 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
1185 continue;
1186
1187
1188 Slice.push(II);
1189
1190
1191 for (Value *Op : II->operand_values())
1192 if (auto *OpI = dyn_cast(Op))
1193 Worklist.push(OpI);
1194 }
1195}
1196
1197bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {
1198 uint64_t TrueWeight, FalseWeight;
1200 uint64_t Max = std::max(TrueWeight, FalseWeight);
1201 uint64_t Sum = TrueWeight + FalseWeight;
1202 if (Sum != 0) {
1205 return true;
1206 }
1207 }
1208 return false;
1209}
1210
1211bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
1212 const CostInfo LoopCost[2]) {
1213
1214
1215
1217 return true;
1218
1220 L->getHeader()->getFirstNonPHI());
1221
1222 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1223 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1224 ORmissL << "No select conversion in the loop due to no reduction of loop's "
1225 "critical path. ";
1227 return false;
1228 }
1229
1230 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1231 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1232
1233
1234
1235
1238 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1239 ORmissL << "No select conversion in the loop due to small reduction of "
1240 "loop's critical path. Gain="
1241 << Gain[1].toString()
1242 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
1244 return false;
1245 }
1246
1247
1248
1249
1250
1251
1252 if (Gain[1] > Gain[0]) {
1253 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1254 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1256 ORmissL << "No select conversion in the loop due to small gradient gain. "
1257 "GradientGain="
1258 << GradientGain.toString() << "%. ";
1260 return false;
1261 }
1262 }
1263
1264 else if (Gain[1] < Gain[0]) {
1265 ORmissL
1266 << "No select conversion in the loop due to negative gradient gain. ";
1268 return false;
1269 }
1270
1271
1272
1273 return true;
1274}
1275
1276
1277
1278
1279
1280bool SelectOptimizeImpl::computeLoopCosts(
1281 const Loop *L, const SelectGroups &SIGroups,
1283 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
1284 << L->getHeader()->getName() << "\n");
1285 const auto SImap = getSImap(SIGroups);
1286 const auto SGmap = getSGmap(SIGroups);
1287
1288
1289 const unsigned Iterations = 2;
1290 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
1291
1292 CostInfo &MaxCost = LoopCost[Iter];
1295 if (I.isDebugOrPseudoInst())
1296 continue;
1297
1298 Scaled64 IPredCost = Scaled64::getZero(),
1299 INonPredCost = Scaled64::getZero();
1300
1301
1302
1303
1304 for (const Use &U : I.operands()) {
1305 auto UI = dyn_cast(U.get());
1306 if (!UI)
1307 continue;
1308 if (InstCostMap.count(UI)) {
1309 IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
1310 INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
1311 }
1312 }
1313 auto ILatency = computeInstCost(&I);
1314 if (!ILatency) {
1316 ORmissL << "Invalid instruction cost preventing analysis and "
1317 "optimization of the inner-most loop containing this "
1318 "instruction. ";
1320 return false;
1321 }
1322 IPredCost += Scaled64::get(*ILatency);
1323 INonPredCost += Scaled64::get(*ILatency);
1324
1325
1326
1327
1328
1329
1330
1331 if (SImap.contains(&I)) {
1333 const auto *SG = SGmap.at(&I);
1334 Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);
1335 Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);
1336 Scaled64 PredictedPathCost =
1337 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
1338
1339 Scaled64 CondCost = Scaled64::getZero();
1340 if (auto *CI = dyn_cast(SG->Condition))
1341 if (InstCostMap.count(CI))
1342 CondCost = InstCostMap[CI].NonPredCost;
1343 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1344
1345 INonPredCost = PredictedPathCost + MispredictCost;
1346 }
1347 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
1348 << INonPredCost << " for " << I << "\n");
1349
1350 InstCostMap[&I] = {IPredCost, INonPredCost};
1351 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1352 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1353 }
1354 }
1356 << " MaxCost = " << MaxCost.PredCost << " "
1357 << MaxCost.NonPredCost << "\n");
1358 }
1359 return true;
1360}
1361
1363SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {
1365 for (const SelectGroup &ASI : SIGroups)
1366 for (const SelectLike &SI : ASI.Selects)
1368 return SImap;
1369}
1370
1372SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {
1374 for (const SelectGroup &ASI : SIGroups)
1375 for (const SelectLike &SI : ASI.Selects)
1377 return SImap;
1378}
1379
1380std::optional<uint64_t>
1381SelectOptimizeImpl::computeInstCost(const Instruction *I) {
1384 if (auto OC = ICost.getValue())
1385 return std::optional<uint64_t>(*OC);
1386 return std::nullopt;
1387}
1388
1390SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,
1391 const Scaled64 CondCost) {
1393
1394
1395
1397
1398
1399 if (isSelectHighlyPredictable(SI))
1400 MispredictRate = 0;
1401
1402
1403
1404
1405 Scaled64 MispredictCost =
1406 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1407 Scaled64::get(MispredictRate);
1408 MispredictCost /= Scaled64::get(100);
1409
1410 return MispredictCost;
1411}
1412
1413
1414
1416SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1417 const SelectLike SI) {
1418 Scaled64 PredPathCost;
1419 uint64_t TrueWeight, FalseWeight;
1421 uint64_t SumWeight = TrueWeight + FalseWeight;
1422 if (SumWeight != 0) {
1423 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1424 FalseCost * Scaled64::get(FalseWeight);
1425 PredPathCost /= Scaled64::get(SumWeight);
1426 return PredPathCost;
1427 }
1428 }
1429
1430
1431 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1432 FalseCost * Scaled64::get(3) + TrueCost);
1433 PredPathCost /= Scaled64::get(4);
1434 return PredPathCost;
1435}
1436
1437bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {
1439 if (SI.getType()->isVectorTy())
1441 else
1444}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static const uint32_t IV[8]
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ ICMP_SGE
signed greater or equal
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Base class for non-instruction debug metadata records that have positions within IR.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
std::string getMsg() const
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Analysis pass that exposes the LoopInfo for a function.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
static ScaledNumber get(uint64_t N)
static ScaledNumber getZero()
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
@ TCK_Latency
The latency of instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Expensive
The cost of a 'div' instruction on x86.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
OneUse_match< T > m_OneUse(const T &SubPattern)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void initializeSelectOptimizePass(PassRegistry &)
unsigned MispredictPenalty