LLVM: lib/CodeGen/SelectOptimize.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

40#include

41#include

42#include

43

44using namespace llvm;

46

47#define DEBUG_TYPE "select-optimize"

48

50 "Number of select groups considered for conversion to branch");

51STATISTIC(NumSelectConvertedExpColdOperand,

52 "Number of select groups converted due to expensive cold operand");

54 "Number of select groups converted due to high-predictability");

56 "Number of select groups not converted due to unpredictability");

58 "Number of select groups not converted due to cold basic block");

60 "Number of select groups converted due to loop-level analysis");

61STATISTIC(NumSelectsConverted, "Number of selects converted");

62

64 "cold-operand-threshold",

65 cl::desc("Maximum frequency of path for an operand to be considered cold."),

67

69 "cold-operand-max-cost-multiplier",

70 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "

71 "slice of a cold operand to be considered inexpensive."),

73

76 cl::desc("Gradient gain threshold (%)."),

78

81 cl::desc("Minimum gain per loop (in cycles) threshold."),

83

85 "select-opti-loop-relative-gain-threshold",

87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),

89

92 cl::desc("Default mispredict rate (initialized to 25%)."));

93

97 cl::desc("Disable loop-level heuristics."));

98

99namespace {

100

101class SelectOptimizeImpl {

106 const LoopInfo *LI = nullptr;

111

112public:

113 SelectOptimizeImpl() = default;

114 SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};

117

119

120 struct CostInfo {

121

122 Scaled64 PredCost;

123

124 Scaled64 NonPredCost;

125 };

126

127

128

129

130 class SelectLike {

131

133

134

135 bool Inverted = false;

136

137

138

139 unsigned CondIdx;

140

141 public:

142 SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)

143 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}

144

146 const Instruction *getI() const { return I; }

147

148 Type *getType() const { return I->getType(); }

149

150 unsigned getConditionOpIndex() { return CondIdx; };

151

152

153

154

155

156 Value *getTrueValue(bool HonorInverts = true) const {

157 if (Inverted && HonorInverts)

158 return getFalseValue(false);

159 if (auto *Sel = dyn_cast(I))

160 return Sel->getTrueValue();

161

162

163 if (isa(I))

164 return nullptr;

165

167 }

168

169

170

171

172 Value *getFalseValue(bool HonorInverts = true) const {

173 if (Inverted && HonorInverts)

174 return getTrueValue(false);

175 if (auto *Sel = dyn_cast(I))

176 return Sel->getFalseValue();

177

178

179

180 if (auto *BO = dyn_cast(I))

181 return BO->getOperand(1 - CondIdx);

182

184 }

185

186

187

188

189 Scaled64 getOpCostOnBranch(

192 auto *V = IsTrue ? getTrueValue() : getFalseValue();

193 if (V) {

194 if (auto *IV = dyn_cast(V)) {

195 auto It = InstCostMap.find(IV);

196 return It != InstCostMap.end() ? It->second.NonPredCost

198 }

200 }

201

202

203

204

207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});

210 if (auto *OpI = dyn_cast(I->getOperand(1 - CondIdx))) {

211 auto It = InstCostMap.find(OpI);

212 if (It != InstCostMap.end())

213 TotalCost += It->second.NonPredCost;

214 }

215 return TotalCost;

216 }

217 };

218

219private:

220

221

222

223 struct SelectGroup {

224 Value *Condition;

226 };

228

229

230

231 bool optimizeSelects(Function &F);

232

233

234

235

236

237 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);

238 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);

239

240

241

242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);

243

244

245 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);

246

247

248

249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,

250 SelectGroups &ProfSIGroups);

251 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,

252 SelectGroups &ProfSIGroups);

253

254

255

256 bool isConvertToBranchProfitableBase(const SelectGroup &ASI);

257

258

259

260

261 bool hasExpensiveColdOperand(const SelectGroup &ASI);

262

263

264

265

266 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,

267 Instruction *SI, bool ForSinking = false);

268

269

270 bool isSelectHighlyPredictable(const SelectLike SI);

271

272

273

274 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);

275

276

277

278 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,

280 CostInfo *LoopCost);

281

282

284 getSImap(const SelectGroups &SIGroups);

285

286

287

289 getSGmap(const SelectGroups &SIGroups);

290

291

292 std::optional<uint64_t> computeInstCost(const Instruction *I);

293

294

295 Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);

296

297

298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,

299 const SelectLike SI);

300

301

302 bool isSelectKindSupported(const SelectLike SI);

303};

304

306 SelectOptimizeImpl Impl;

307

308public:

309 static char ID;

310

313 }

314

316 return Impl.runOnFunction(F, *this);

317 }

318

326 }

327};

328

329}

330

333 SelectOptimizeImpl Impl(TM);

334 return Impl.run(F, FAM);

335}

336

337char SelectOptimize::ID = 0;

338

340 false)

349

351

354 TSI = TM->getSubtargetImpl(F);

356

357

358

359

364

368

370 .getCachedResult(*F.getParent());

371 assert(PSI && "This pass requires module analysis pass `profile-summary`!");

373

374

377

380 TSchedModel.init(TSI);

381

382 bool Changed = optimizeSelects(F);

384}

385

386bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {

388 TSI = TM->getSubtargetImpl(F);

390

391

392

393

397 return false;

398

400

402 return false;

403

408 TSchedModel.init(TSI);

409

410

412 return false;

413

414 return optimizeSelects(F);

415}

416

417bool SelectOptimizeImpl::optimizeSelects(Function &F) {

418

419 SelectGroups ProfSIGroups;

420

421 optimizeSelectsBase(F, ProfSIGroups);

422

423 optimizeSelectsInnerLoops(F, ProfSIGroups);

424

425

426

427 convertProfitableSIGroups(ProfSIGroups);

428

429

430 return !ProfSIGroups.empty();

431}

432

433void SelectOptimizeImpl::optimizeSelectsBase(Function &F,

434 SelectGroups &ProfSIGroups) {

435

436 SelectGroups SIGroups;

438

440 if (L && L->isInnermost())

441 continue;

442 collectSelectGroups(BB, SIGroups);

443 }

444

445

446 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);

447}

448

449void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,

450 SelectGroups &ProfSIGroups) {

452

453 for (unsigned long i = 0; i < Loops.size(); ++i)

454 for (Loop *ChildL : Loops[i]->getSubLoops())

455 Loops.push_back(ChildL);

456

458 if (L->isInnermost())

459 continue;

460

461 SelectGroups SIGroups;

463 collectSelectGroups(*BB, SIGroups);

464

465 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);

466 }

467}

468

469

470

471

472

473

474

475

476

477

478

479

481 SelectOptimizeImpl::SelectLike &SI, bool isTrue,

484 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();

485 if (V) {

486 auto *IV = dyn_cast(V);

487 if (IV && OptSelects.count(IV))

488 return isTrue ? OptSelects[IV].first : OptSelects[IV].second;

489 return V;

490 }

491

492 auto *BO = cast(SI.getI());

493 assert((BO->getOpcode() == Instruction::Add ||

494 BO->getOpcode() == Instruction::Or ||

495 BO->getOpcode() == Instruction::Sub) &&

496 "Only currently handling Add, Or and Sub binary operators.");

497

498 auto *CBO = BO->clone();

499 auto CondIdx = SI.getConditionOpIndex();

500 auto *AuxI = cast(CBO->getOperand(CondIdx));

501 if (isa(AuxI) || isa(AuxI)) {

502 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));

503 } else {

504 assert((isa(AuxI) || isa(AuxI)) &&

505 "Unexpected opcode");

506 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));

507 }

508

509 unsigned OtherIdx = 1 - CondIdx;

510 if (auto *IV = dyn_cast(CBO->getOperand(OtherIdx))) {

511 if (OptSelects.count(IV))

512 CBO->setOperand(OtherIdx,

513 isTrue ? OptSelects[IV].first : OptSelects[IV].second);

514 }

515 CBO->insertBefore(B->getTerminator());

516 return CBO;

517}

518

519void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {

520 for (SelectGroup &ASI : ProfSIGroups) {

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

560 typedef std::stack<Instruction *>::size_type StackSizeType;

561 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;

562 for (SelectLike &SI : ASI.Selects) {

563 if (!isa(SI.getI()))

564 continue;

565

566

567 if (auto *TI = dyn_cast_or_null(SI.getTrueValue())) {

568 std::stack<Instruction *> TrueSlice;

569 getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);

570 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());

572 }

573 if (auto *FI = dyn_cast_or_null(SI.getFalseValue())) {

574 if (isa(SI.getI()) || !FI->hasOneUse()) {

575 std::stack<Instruction *> FalseSlice;

576 getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);

577 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());

578 FalseSlices.push_back(FalseSlice);

579 }

580 }

581 }

582

583

584

585

586

587

588

589

590

591

593 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {

594 for (auto &S : TrueSlices) {

595 if (!S.empty()) {

596 TrueSlicesInterleaved.push_back(S.top());

597 S.pop();

598 }

599 }

600 }

601 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {

602 for (auto &S : FalseSlices) {

603 if (!S.empty()) {

604 FalseSlicesInterleaved.push_back(S.top());

605 S.pop();

606 }

607 }

608 }

609

610

611 SelectLike &SI = ASI.Selects.front();

612 SelectLike &LastSI = ASI.Selects.back();

613 BasicBlock *StartBlock = SI.getI()->getParent();

615

616

617

618

619

620 SplitPt.setHeadBit(true);

622 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));

623

625

626

627

629 auto DIt = SI.getI()->getIterator();

630 auto NIt = ASI.Selects.begin();

631 while (&*DIt != LastSI.getI()) {

632 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())

633 ++NIt;

634 else

636 DIt++;

637 }

639 for (auto *DI : SinkInstrs)

641

642

643

644 auto TransferDbgRecords = [&](Instruction &I) {

650 }

651 };

652

653

654

655

656 auto R = make_range(std::next(SI.getI()->getIterator()),

657 std::next(LastSI.getI()->getIterator()));

659

660

661

662 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;

663 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;

664

665 auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {

666 for (auto &SL : SG.Selects) {

667 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)

668 return true;

669 }

670 return false;

671 };

672 if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {

674 EndBlock->getParent(), EndBlock);

676 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());

677 for (Instruction *TrueInst : TrueSlicesInterleaved)

678 TrueInst->moveBefore(TrueBranch);

679 }

680 if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {

681 FalseBlock =

683 EndBlock->getParent(), EndBlock);

685 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());

686 for (Instruction *FalseInst : FalseSlicesInterleaved)

687 FalseInst->moveBefore(FalseBranch);

688 }

689

690

691 if (TrueBlock == FalseBlock) {

692 assert(TrueBlock == nullptr &&

693 "Unexpected basic block transform while optimizing select");

694

696 EndBlock->getParent(), EndBlock);

698 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());

699 }

700

701

702

703

704

705

707 if (TrueBlock == nullptr) {

708 TT = EndBlock;

709 FT = FalseBlock;

710 TrueBlock = StartBlock;

711 } else if (FalseBlock == nullptr) {

712 TT = TrueBlock;

713 FT = EndBlock;

714 FalseBlock = StartBlock;

715 } else {

716 TT = TrueBlock;

717 FT = FalseBlock;

718 }

720 auto *CondFr =

721 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");

722

724

725

726

727

729 for (SelectLike &SI : ASI.Selects) {

730

734

735

737 for (auto &SG : ProfSIGroups) {

738 if (SG.Condition == SI.getI())

739 SG.Condition = PN;

740 }

741 }

742 SI.getI()->replaceAllUsesWith(PN);

745 INS[PN] = {TV, FV};

749 ++NumSelectsConverted;

750 }

751 IB.CreateCondBr(CondFr, TT, FT, SI.getI());

752

753

754 for (SelectLike &SI : ASI.Selects)

755 SI.getI()->eraseFromParent();

756 }

757}

758

759void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,

760 SelectGroups &SIGroups) {

761

762

763

764

765

766

767

768

769

770 struct SelectLikeInfo {

772 bool IsAuxiliary;

773 bool IsInverted;

774 unsigned ConditionIdx;

775 };

776

778

779

781

782

783

784

785 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {

786 if (auto *Cmp = dyn_cast(I)) {

788 return SelectInfo.end();

789 }

790

793 Cond->getType()->isIntegerTy(1)) {

795 return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;

796 }

797

799 return SelectInfo.insert({I, {Cond, true, true, 0}}).first;

800 }

801

802

805 return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;

806 }

810 I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {

811 for (auto *CmpI : SeenCmp) {

812 auto Pred = CmpI->getPredicate();

813 if (Val != CmpI->getOperand(0))

814 continue;

816 match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||

822 match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {

823 bool Inverted =

825 return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;

826 }

827 }

828 return SelectInfo.end();

829 }

830

831

832

833

834

836 auto MatchZExtOrSExtPattern =

838 auto MatchShiftPattern =

840

841

842

843 if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||

844 (match(I, MatchShiftPattern) &&

845 X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {

846 if (I->getOpcode() != Instruction::Add &&

847 I->getOpcode() != Instruction::Sub &&

848 I->getOpcode() != Instruction::Or)

849 return SelectInfo.end();

850

851 if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))

852 return SelectInfo.end();

853

854

855

856

857

858 unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;

859 for (; Idx < 2; Idx++) {

860 auto *Op = I->getOperand(Idx);

861 auto It = SelectInfo.find(Op);

862 if (It != SelectInfo.end() && It->second.IsAuxiliary) {

863 Cond = It->second.Cond;

864 bool Inverted = It->second.IsInverted;

865 return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;

866 }

867 }

868 }

869 return SelectInfo.end();

870 };

871

872 bool AlreadyProcessed = false;

875 while (BBIt != BB.end()) {

877 if (I->isDebugOrPseudoInst())

878 continue;

879

880 if (!AlreadyProcessed)

881 It = ProcessSelectInfo(I);

882 else

883 AlreadyProcessed = false;

884

885 if (It == SelectInfo.end() || It->second.IsAuxiliary)

886 continue;

887

889 continue;

890

892

893 if (Cond->getType()->isIntegerTy(1))

894 continue;

895

896 SelectGroup SIGroup = {Cond, {}};

897 SIGroup.Selects.emplace_back(I, It->second.IsInverted,

898 It->second.ConditionIdx);

899

900

901

902 if (!isSelectKindSupported(SIGroup.Selects.front()))

903 continue;

904

905 while (BBIt != BB.end()) {

907

908

910 ++BBIt;

911 continue;

912 }

913

914 It = ProcessSelectInfo(NI);

915 if (It == SelectInfo.end()) {

916 AlreadyProcessed = true;

917 break;

918 }

919

920

921 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;

922 if (Cond != CurrCond) {

923 AlreadyProcessed = true;

924 break;

925 }

926

927 if (!IsAux)

928 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);

929 ++BBIt;

930 }

932 dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";

933 for (auto &SI : SIGroup.Selects)

934 dbgs() << " " << *SI.getI() << "\n";

935 });

936

937 SIGroups.push_back(SIGroup);

938 }

939}

940

941void SelectOptimizeImpl::findProfitableSIGroupsBase(

942 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {

943 for (SelectGroup &ASI : SIGroups) {

944 ++NumSelectOptAnalyzed;

945 if (isConvertToBranchProfitableBase(ASI))

946 ProfSIGroups.push_back(ASI);

947 }

948}

949

953 ORE->emit(Rem);

954}

955

956void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(

957 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {

958 NumSelectOptAnalyzed += SIGroups.size();

959

960

961

962

963

964

965

966

967

968

970 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},

971 {Scaled64::getZero(), Scaled64::getZero()}};

972 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||

973 !checkLoopHeuristics(L, LoopCost)) {

974 return;

975 }

976

977 for (SelectGroup &ASI : SIGroups) {

978

979

980 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();

981 for (SelectLike &SI : ASI.Selects) {

982 SelectCost = std::max(SelectCost, InstCostMap[SI.getI()].PredCost);

983 BranchCost = std::max(BranchCost, InstCostMap[SI.getI()].NonPredCost);

984 }

985 if (BranchCost < SelectCost) {

987 ASI.Selects.front().getI());

988 OR << "Profitable to convert to branch (loop analysis). BranchCost="

989 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()

990 << ". ";

992 ++NumSelectConvertedLoop;

993 ProfSIGroups.push_back(ASI);

994 } else {

996 ASI.Selects.front().getI());

997 ORmiss << "Select is more profitable (loop analysis). BranchCost="

998 << BranchCost.toString()

999 << ", SelectCost=" << SelectCost.toString() << ". ";

1001 }

1002 }

1003}

1004

1005bool SelectOptimizeImpl::isConvertToBranchProfitableBase(

1006 const SelectGroup &ASI) {

1007 const SelectLike &SI = ASI.Selects.front();

1008 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()

1009 << "\n");

1012

1013

1014 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {

1015 ++NumSelectColdBB;

1016 ORmiss << "Not converted to branch because of cold basic block. ";

1018 return false;

1019 }

1020

1021

1022 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {

1023 ++NumSelectUnPred;

1024 ORmiss << "Not converted to branch because of unpredictable branch. ";

1026 return false;

1027 }

1028

1029

1030

1032 ++NumSelectConvertedHighPred;

1033 OR << "Converted to branch because of highly predictable branch. ";

1035 return true;

1036 }

1037

1038

1039

1040 if (hasExpensiveColdOperand(ASI)) {

1041 ++NumSelectConvertedExpColdOperand;

1042 OR << "Converted to branch because of expensive cold operand.";

1044 return true;

1045 }

1046

1047

1048

1049

1050 auto *BB = SI.getI()->getParent();

1052 if (L && L->isInnermost() && L->getLoopLatch() == BB &&

1053 ASI.Selects.size() >= 3) {

1054 OR << "Converted to branch because select group in the latch block is big.";

1056 return true;

1057 }

1058

1059 ORmiss << "Not profitable to convert to branch (base heuristic).";

1061 return false;

1062}

1063

1066 return (Numerator + (Denominator / 2)) / Denominator;

1067}

1068

1071 if (isa(SI.getI()))

1073 return false;

1074}

1075

1076bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {

1077 bool ColdOperand = false;

1078 uint64_t TrueWeight, FalseWeight, TotalWeight;

1080 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);

1081 TotalWeight = TrueWeight + FalseWeight;

1082

1086 ASI.Selects.front().getI());

1087 ORmiss << "Profile data available but missing branch-weights metadata for "

1088 "select instruction. ";

1090 }

1091 if (!ColdOperand)

1092 return false;

1093

1094

1095 for (SelectLike SI : ASI.Selects) {

1098 if (TrueWeight < FalseWeight) {

1099 ColdI = dyn_cast_or_null(SI.getTrueValue());

1100 HotWeight = FalseWeight;

1101 } else {

1102 ColdI = dyn_cast_or_null(SI.getFalseValue());

1103 HotWeight = TrueWeight;

1104 }

1105 if (ColdI) {

1106 std::stack<Instruction *> ColdSlice;

1107 getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());

1109 while (!ColdSlice.empty()) {

1112 ColdSlice.pop();

1113 }

1114

1115

1116

1117

1119 divideNearest(SliceCost * HotWeight, TotalWeight);

1120 if (AdjSliceCost >=

1122 return true;

1123 }

1124 }

1125 return false;

1126}

1127

1128

1129

1130

1132

1133 if (LoadI->getParent() != SI->getParent())

1134 return false;

1136 while (&*It != SI) {

1137 if (It->mayWriteToMemory())

1138 return false;

1139 It++;

1140 }

1141 return true;

1142}

1143

1144

1145

1146

1147

1148

1149

1150void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,

1151 std::stack<Instruction *> &Slice,

1153 bool ForSinking) {

1155 std::queue<Instruction *> Worklist;

1156 Worklist.push(I);

1157 while (!Worklist.empty()) {

1159 Worklist.pop();

1160

1161

1162 if (!Visited.insert(II).second)

1163 continue;

1164

1165 if (II->hasOneUse())

1166 continue;

1167

1168

1169

1170

1171 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||

1172 isa(II) || isa(II)))

1173 continue;

1174

1175

1176

1177

1178

1180 continue;

1181

1182

1183

1184 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))

1185 continue;

1186

1187

1188 Slice.push(II);

1189

1190

1191 for (Value *Op : II->operand_values())

1192 if (auto *OpI = dyn_cast(Op))

1193 Worklist.push(OpI);

1194 }

1195}

1196

1197bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {

1198 uint64_t TrueWeight, FalseWeight;

1200 uint64_t Max = std::max(TrueWeight, FalseWeight);

1201 uint64_t Sum = TrueWeight + FalseWeight;

1202 if (Sum != 0) {

1205 return true;

1206 }

1207 }

1208 return false;

1209}

1210

1211bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,

1212 const CostInfo LoopCost[2]) {

1213

1214

1215

1217 return true;

1218

1220 L->getHeader()->getFirstNonPHI());

1221

1222 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||

1223 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {

1224 ORmissL << "No select conversion in the loop due to no reduction of loop's "

1225 "critical path. ";

1227 return false;

1228 }

1229

1230 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,

1231 LoopCost[1].PredCost - LoopCost[1].NonPredCost};

1232

1233

1234

1235

1238 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;

1239 ORmissL << "No select conversion in the loop due to small reduction of "

1240 "loop's critical path. Gain="

1241 << Gain[1].toString()

1242 << ", RelativeGain=" << RelativeGain.toString() << "%. ";

1244 return false;

1245 }

1246

1247

1248

1249

1250

1251

1252 if (Gain[1] > Gain[0]) {

1253 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /

1254 (LoopCost[1].PredCost - LoopCost[0].PredCost);

1256 ORmissL << "No select conversion in the loop due to small gradient gain. "

1257 "GradientGain="

1258 << GradientGain.toString() << "%. ";

1260 return false;

1261 }

1262 }

1263

1264 else if (Gain[1] < Gain[0]) {

1265 ORmissL

1266 << "No select conversion in the loop due to negative gradient gain. ";

1268 return false;

1269 }

1270

1271

1272

1273 return true;

1274}

1275

1276

1277

1278

1279

1280bool SelectOptimizeImpl::computeLoopCosts(

1281 const Loop *L, const SelectGroups &SIGroups,

1283 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "

1284 << L->getHeader()->getName() << "\n");

1285 const auto SImap = getSImap(SIGroups);

1286 const auto SGmap = getSGmap(SIGroups);

1287

1288

1289 const unsigned Iterations = 2;

1290 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {

1291

1292 CostInfo &MaxCost = LoopCost[Iter];

1295 if (I.isDebugOrPseudoInst())

1296 continue;

1297

1298 Scaled64 IPredCost = Scaled64::getZero(),

1299 INonPredCost = Scaled64::getZero();

1300

1301

1302

1303

1304 for (const Use &U : I.operands()) {

1305 auto UI = dyn_cast(U.get());

1306 if (!UI)

1307 continue;

1308 if (InstCostMap.count(UI)) {

1309 IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);

1310 INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);

1311 }

1312 }

1313 auto ILatency = computeInstCost(&I);

1314 if (!ILatency) {

1316 ORmissL << "Invalid instruction cost preventing analysis and "

1317 "optimization of the inner-most loop containing this "

1318 "instruction. ";

1320 return false;

1321 }

1322 IPredCost += Scaled64::get(*ILatency);

1323 INonPredCost += Scaled64::get(*ILatency);

1324

1325

1326

1327

1328

1329

1330

1331 if (SImap.contains(&I)) {

1332 auto SI = SImap.at(&I);

1333 const auto *SG = SGmap.at(&I);

1334 Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);

1335 Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);

1336 Scaled64 PredictedPathCost =

1337 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);

1338

1339 Scaled64 CondCost = Scaled64::getZero();

1340 if (auto *CI = dyn_cast(SG->Condition))

1341 if (InstCostMap.count(CI))

1342 CondCost = InstCostMap[CI].NonPredCost;

1343 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);

1344

1345 INonPredCost = PredictedPathCost + MispredictCost;

1346 }

1347 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"

1348 << INonPredCost << " for " << I << "\n");

1349

1350 InstCostMap[&I] = {IPredCost, INonPredCost};

1351 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);

1352 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);

1353 }

1354 }

1356 << " MaxCost = " << MaxCost.PredCost << " "

1357 << MaxCost.NonPredCost << "\n");

1358 }

1359 return true;

1360}

1361

1363SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {

1365 for (const SelectGroup &ASI : SIGroups)

1366 for (const SelectLike &SI : ASI.Selects)

1368 return SImap;

1369}

1370

1372SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {

1374 for (const SelectGroup &ASI : SIGroups)

1375 for (const SelectLike &SI : ASI.Selects)

1377 return SImap;

1378}

1379

1380std::optional<uint64_t>

1381SelectOptimizeImpl::computeInstCost(const Instruction *I) {

1384 if (auto OC = ICost.getValue())

1385 return std::optional<uint64_t>(*OC);

1386 return std::nullopt;

1387}

1388

1390SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,

1391 const Scaled64 CondCost) {

1393

1394

1395

1397

1398

1399 if (isSelectHighlyPredictable(SI))

1400 MispredictRate = 0;

1401

1402

1403

1404

1405 Scaled64 MispredictCost =

1406 std::max(Scaled64::get(MispredictPenalty), CondCost) *

1407 Scaled64::get(MispredictRate);

1408 MispredictCost /= Scaled64::get(100);

1409

1410 return MispredictCost;

1411}

1412

1413

1414

1416SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,

1417 const SelectLike SI) {

1418 Scaled64 PredPathCost;

1419 uint64_t TrueWeight, FalseWeight;

1421 uint64_t SumWeight = TrueWeight + FalseWeight;

1422 if (SumWeight != 0) {

1423 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +

1424 FalseCost * Scaled64::get(FalseWeight);

1425 PredPathCost /= Scaled64::get(SumWeight);

1426 return PredPathCost;

1427 }

1428 }

1429

1430

1431 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,

1432 FalseCost * Scaled64::get(3) + TrueCost);

1433 PredPathCost /= Scaled64::get(4);

1434 return PredPathCost;

1435}

1436

1437bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {

1439 if (SI.getType()->isVectorTy())

1441 else

1444}

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)

If isTrue is true, return the true value of SI, otherwise return false value of SI.

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

static bool runOnFunction(Function &F, bool PostInlining)

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

uint64_t IntrinsicInst * II

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file contains the declarations for profiling metadata utility functions.

const SmallVectorImpl< MachineOperand > & Cond

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)

static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)

static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)

static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))

static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)

static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))

static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)

static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)

static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)

This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static SymbolRef::Type getType(const Symbol *Sym)

This file describes how to lower LLVM code to machine code.

Target-Independent Code Generator Pass Configuration Options pass.

This pass exposes codegen information to IR-level passes.

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

static const uint32_t IV[8]

A container for analyses that lazily runs them and caches their results.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)

Insert a DbgRecord into a block at the position given by Here.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)

Split the basic block into two basic blocks at the specified instruction.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

LLVMContext & getContext() const

Get the context in which this basic block lives.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Analysis pass which computes BlockFrequencyInfo.

Legacy analysis pass which computes BlockFrequencyInfo.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Conditional or Unconditional Branch instruction.

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)

@ ICMP_SLT

signed less than

@ ICMP_SLE

signed less or equal

@ ICMP_SGT

signed greater than

@ ICMP_SGE

signed greater or equal

This is the shared class of boolean and integer constants.

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

This class represents an Operation in the Expression.

Base class for non-instruction debug metadata records that have positions within IR.

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...

std::string getMsg() const

FunctionPass class - This class is used to implement most global optimizations.

virtual bool runOnFunction(Function &F)=0

runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

std::optional< CostType > getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

bool isDebugOrPseudoInst() const LLVM_READONLY

Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.

void insertBefore(Instruction *InsertPos)

Insert an unlinked instruction into a basic block immediately before the specified instruction.

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

Analysis pass that exposes the LoopInfo for a function.

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

The legacy pass manager's analysis pass to compute loop information.

Represents a single loop in the control flow graph.

An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.

Analysis providing profile information.

bool hasProfileSummary() const

Returns true if profile summary is available.

bool isColdBlock(const BBType *BB, BFIT *BFI) const

Returns true if BasicBlock BB is considered cold.

Simple representation of a scaled number.

static ScaledNumber get(uint64_t N)

static ScaledNumber getZero()

PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)

bool insert(const value_type &X)

Insert a new element into the SetVector.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Analysis pass providing the TargetTransformInfo.

virtual bool isSelectSupported(SelectSupportKind) const

SelectSupportKind

Enum that describes what type of support for selects the target has.

bool isPredictableSelectExpensive() const

Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Primary interface to the complete machine description for the target machine.

Target-Independent Code Generator Pass Configuration Options.

Provide an instruction scheduling machine model to CodeGen passes.

const MCSchedModel * getMCSchedModel() const

void init(const TargetSubtargetInfo *TSInfo)

Initialize the machine model for instruction scheduling.

TargetSubtargetInfo - Generic base class for all target subtargets.

virtual const TargetLowering * getTargetLowering() const

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

bool shouldTreatInstructionLikeSelect(const Instruction *I) const

Should the Select Optimization pass treat the given instruction like a select, potentially converting...

@ TCK_Latency

The latency of instruction.

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const

This is an approximation of reciprocal throughput of a math/logic op.

bool enableSelectOptimize() const

Should the Select Optimization pass be enabled and ran.

BranchProbability getPredictableBranchThreshold() const

If a branch or a select condition is skewed in one direction by more than this factor,...

@ TCC_Expensive

The cost of a 'div' instruction on x86.

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const

Estimate the cost of a given IR user when lowered.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isIntegerTy() const

True if this is an instance of IntegerType.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

void takeName(Value *V)

Transfer the name from V to this value.

const ParentTy * getParent() const

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

bool match(Val *V, const Pattern &P)

BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)

Matches logical shift operations.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

OneUse_match< T > m_OneUse(const T &SubPattern)

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)

Matches a BinaryOperator with LHS and RHS in either order.

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)

Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

This is an optimization pass for GlobalISel generic memory operations.

UnaryFunction for_each(R &&Range, UnaryFunction F)

Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.

FunctionPass * createSelectOptimizePass()

This pass converts conditional moves to conditional jumps when profitable.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

constexpr T divideNearest(U Numerator, V Denominator)

Returns (Numerator / Denominator) rounded by round-half-up.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)

Extract branch weights from MD_prof metadata.

void initializeSelectOptimizePass(PassRegistry &)

unsigned MispredictPenalty