LLVM: lib/CodeGen/SelectOptimize.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

40#include

41#include

42#include

43

44using namespace llvm;

46

47#define DEBUG_TYPE "select-optimize"

48

50 "Number of select groups considered for conversion to branch");

51STATISTIC(NumSelectConvertedExpColdOperand,

52 "Number of select groups converted due to expensive cold operand");

54 "Number of select groups converted due to high-predictability");

56 "Number of select groups not converted due to unpredictability");

58 "Number of select groups not converted due to cold basic block");

60 "Number of select groups converted due to loop-level analysis");

61STATISTIC(NumSelectsConverted, "Number of selects converted");

62

64 "cold-operand-threshold",

65 cl::desc("Maximum frequency of path for an operand to be considered cold."),

67

69 "cold-operand-max-cost-multiplier",

70 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "

71 "slice of a cold operand to be considered inexpensive."),

73

76 cl::desc("Gradient gain threshold (%)."),

78

81 cl::desc("Minimum gain per loop (in cycles) threshold."),

83

85 "select-opti-loop-relative-gain-threshold",

87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),

89

92 cl::desc("Default mispredict rate (initialized to 25%)."));

93

97 cl::desc("Disable loop-level heuristics."));

98

99namespace {

100

101class SelectOptimizeImpl {

106 const LoopInfo *LI = nullptr;

111

112public:

113 SelectOptimizeImpl() = default;

114 SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};

117

118 using Scaled64 = ScaledNumber<uint64_t>;

119

120 struct CostInfo {

121

122 Scaled64 PredCost;

123

124 Scaled64 NonPredCost;

125 };

126

127

128

129

130 class SelectLike {

131

133

134

135 bool Inverted = false;

136

137

138

139 unsigned CondIdx;

140

141 public:

142 SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)

143 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}

144

146 const Instruction *getI() const { return I; }

147

148 Type *getType() const { return I->getType(); }

149

150 unsigned getConditionOpIndex() { return CondIdx; };

151

152

153

154

155

156 Value *getTrueValue(bool HonorInverts = true) const {

157 if (Inverted && HonorInverts)

158 return getFalseValue(false);

160 return Sel->getTrueValue();

161

162

164 return nullptr;

165

167 }

168

169

170

171

172 Value *getFalseValue(bool HonorInverts = true) const {

173 if (Inverted && HonorInverts)

174 return getTrueValue(false);

176 return Sel->getFalseValue();

177

178

179

181 return BO->getOperand(1 - CondIdx);

182

184 }

185

186

187

188

189 Scaled64 getOpCostOnBranch(

190 bool IsTrue, const DenseMap<const Instruction *, CostInfo> &InstCostMap,

191 const TargetTransformInfo *TTI) {

192 auto *V = IsTrue ? getTrueValue() : getFalseValue();

193 if (V) {

195 auto It = InstCostMap.find(IV);

196 return It != InstCostMap.end() ? It->second.NonPredCost

198 }

200 }

201

202

203

204

207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});

211 auto It = InstCostMap.find(OpI);

212 if (It != InstCostMap.end())

213 TotalCost += It->second.NonPredCost;

214 }

215 return TotalCost;

216 }

217 };

218

219private:

220

221

222

223 struct SelectGroup {

224 Value *Condition;

226 };

227 using SelectGroups = SmallVector<SelectGroup, 2>;

228

229

230

231 bool optimizeSelects(Function &F);

232

233

234

235

236

237 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);

238 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);

239

240

241

242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);

243

244

245 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);

246

247

248

249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,

250 SelectGroups &ProfSIGroups);

251 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,

252 SelectGroups &ProfSIGroups);

253

254

255

256 bool isConvertToBranchProfitableBase(const SelectGroup &ASI);

257

258

259

260

261 bool hasExpensiveColdOperand(const SelectGroup &ASI);

262

263

264

265

266 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,

267 Instruction *SI, bool ForSinking = false);

268

269

270 bool isSelectHighlyPredictable(const SelectLike SI);

271

272

273

274 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);

275

276

277

278 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,

279 DenseMap<const Instruction *, CostInfo> &InstCostMap,

280 CostInfo *LoopCost);

281

282

283 SmallDenseMap<const Instruction *, SelectLike, 2>

284 getSImap(const SelectGroups &SIGroups);

285

286

287

288 SmallDenseMap<const Instruction *, const SelectGroup *, 2>

289 getSGmap(const SelectGroups &SIGroups);

290

291

292 std::optional<uint64_t> computeInstCost(const Instruction *I);

293

294

295 Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);

296

297

298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,

299 const SelectLike SI);

300

301

302 bool isSelectKindSupported(const SelectLike SI);

303};

304

306 SelectOptimizeImpl Impl;

307

308public:

309 static char ID;

310

311 SelectOptimize() : FunctionPass(ID) {

313 }

314

316 return Impl.runOnFunction(F, *this);

317 }

318

319 void getAnalysisUsage(AnalysisUsage &AU) const override {

320 AU.addRequired();

322 AU.addRequired();

324 AU.addRequired();

325 AU.addRequired();

326 }

327};

328

329}

330

333 SelectOptimizeImpl Impl(TM);

334 return Impl.run(F, FAM);

335}

336

337char SelectOptimize::ID = 0;

338

340 false)

349

351

356

357

358

359

364

366 if (TTI->enableSelectOptimize())

368

370 .getCachedResult(*F.getParent());

371 assert(PSI && "This pass requires module analysis pass `profile-summary`!");

373

374

377

380 TSchedModel.init(TSI);

381

382 bool Changed = optimizeSelects(F);

384}

385

386bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {

390

391

392

393

397 return false;

398

400

401 if (TTI->enableSelectOptimize())

402 return false;

403

408 TSchedModel.init(TSI);

409

410

412 return false;

413

414 return optimizeSelects(F);

415}

416

417bool SelectOptimizeImpl::optimizeSelects(Function &F) {

418

419 SelectGroups ProfSIGroups;

420

421 optimizeSelectsBase(F, ProfSIGroups);

422

423 optimizeSelectsInnerLoops(F, ProfSIGroups);

424

425

426

427 convertProfitableSIGroups(ProfSIGroups);

428

429

430 return !ProfSIGroups.empty();

431}

432

433void SelectOptimizeImpl::optimizeSelectsBase(Function &F,

434 SelectGroups &ProfSIGroups) {

435

436 SelectGroups SIGroups;

438

440 if (L && L->isInnermost())

441 continue;

442 collectSelectGroups(BB, SIGroups);

443 }

444

445

446 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);

447}

448

449void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,

450 SelectGroups &ProfSIGroups) {

452

453 for (unsigned long i = 0; i < Loops.size(); ++i)

455

457 if (L->isInnermost())

458 continue;

459

460 SelectGroups SIGroups;

462 collectSelectGroups(*BB, SIGroups);

463

464 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);

465 }

466}

467

468

469

470

471

472

473

474

475

476

477

478

480 SelectOptimizeImpl::SelectLike &SI, bool isTrue,

483 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();

484 if (V) {

486 if (auto It = OptSelects.find(IV); It != OptSelects.end())

487 return isTrue ? It->second.first : It->second.second;

488 return V;

489 }

490

492 assert((BO->getOpcode() == Instruction::Add ||

493 BO->getOpcode() == Instruction::Or ||

494 BO->getOpcode() == Instruction::Sub) &&

495 "Only currently handling Add, Or and Sub binary operators.");

496

497 auto *CBO = BO->clone();

498 auto CondIdx = SI.getConditionOpIndex();

501 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));

502 } else {

504 "Unexpected opcode");

505 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));

506 }

507

508 unsigned OtherIdx = 1 - CondIdx;

510 if (auto It = OptSelects.find(IV); It != OptSelects.end())

511 CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);

512 }

513 CBO->insertBefore(B->getTerminator()->getIterator());

514 return CBO;

515}

516

517void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {

518 for (SelectGroup &ASI : ProfSIGroups) {

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

558 typedef std::stack<Instruction *>::size_type StackSizeType;

559 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;

560 for (SelectLike &SI : ASI.Selects) {

562 continue;

563

564

566 std::stack<Instruction *> TrueSlice;

567 getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);

568 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());

570 }

573 std::stack<Instruction *> FalseSlice;

574 getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);

575 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());

576 FalseSlices.push_back(FalseSlice);

577 }

578 }

579 }

580

581

582

583

584

585

586

587

588

589

591 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {

592 for (auto &S : TrueSlices) {

593 if (!S.empty()) {

594 TrueSlicesInterleaved.push_back(S.top());

595 S.pop();

596 }

597 }

598 }

599 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {

600 for (auto &S : FalseSlices) {

601 if (!S.empty()) {

602 FalseSlicesInterleaved.push_back(S.top());

603 S.pop();

604 }

605 }

606 }

607

608

609 SelectLike &SI = ASI.Selects.front();

610 SelectLike &LastSI = ASI.Selects.back();

611 BasicBlock *StartBlock = SI.getI()->getParent();

613

614

615

616

617

618 SplitPt.setHeadBit(true);

621

623

624

625

627 auto DIt = SI.getI()->getIterator();

628 auto NIt = ASI.Selects.begin();

629 while (&*DIt != LastSI.getI()) {

630 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())

631 ++NIt;

632 else

634 DIt++;

635 }

637 for (auto *DI : SinkInstrs)

638 DI->moveBeforePreserving(InsertionPoint);

639

640

641

642 auto TransferDbgRecords = [&](Instruction &I) {

648 }

649 };

650

651

652

653

654 auto R = make_range(std::next(SI.getI()->getIterator()),

655 std::next(LastSI.getI()->getIterator()));

657

658

659

660 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;

661 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;

662

663 auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {

664 for (auto &SL : SG.Selects) {

665 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)

666 return true;

667 }

668 return false;

669 };

670 if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {

672 EndBlock->getParent(), EndBlock);

674 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());

675 for (Instruction *TrueInst : TrueSlicesInterleaved)

676 TrueInst->moveBefore(TrueBranch->getIterator());

677 }

678 if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {

679 FalseBlock =

681 EndBlock->getParent(), EndBlock);

683 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());

684 for (Instruction *FalseInst : FalseSlicesInterleaved)

685 FalseInst->moveBefore(FalseBranch->getIterator());

686 }

687

688

689 if (TrueBlock == FalseBlock) {

690 assert(TrueBlock == nullptr &&

691 "Unexpected basic block transform while optimizing select");

692

694 EndBlock->getParent(), EndBlock);

696 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());

697 }

698

699

700

701

702

703

705 if (TrueBlock == nullptr) {

706 TT = EndBlock;

707 FT = FalseBlock;

708 TrueBlock = StartBlock;

709 } else if (FalseBlock == nullptr) {

710 TT = TrueBlock;

711 FT = EndBlock;

712 FalseBlock = StartBlock;

713 } else {

714 TT = TrueBlock;

715 FT = FalseBlock;

716 }

718 auto *CondFr =

719 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");

720

722

723

724

725

726 InsertionPoint = EndBlock->begin();

727 for (SelectLike &SI : ASI.Selects) {

728

732

733

735 for (auto &SG : ProfSIGroups) {

736 if (SG.Condition == SI.getI())

737 SG.Condition = PN;

738 }

739 }

740 SI.getI()->replaceAllUsesWith(PN);

743 INS[PN] = {TV, FV};

747 ++NumSelectsConverted;

748 }

749 IB.CreateCondBr(CondFr, TT, FT, SI.getI());

750

751

752 for (SelectLike &SI : ASI.Selects)

753 SI.getI()->eraseFromParent();

754 }

755}

756

757void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,

758 SelectGroups &SIGroups) {

759

760

761

762

763

764

765

766

767

768 struct SelectLikeInfo {

770 bool IsAuxiliary;

771 bool IsInverted;

772 unsigned ConditionIdx;

773 };

774

776

777

779

780

781

782

783 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {

786 return SelectInfo.end();

787 }

788

791 Cond->getType()->isIntegerTy(1)) {

793 return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;

794 }

795

797 return SelectInfo.insert({I, {Cond, true, true, 0}}).first;

798 }

799

800

803 return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;

804 }

808 I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {

809 for (auto *CmpI : SeenCmp) {

810 auto Pred = CmpI->getPredicate();

811 if (Val != CmpI->getOperand(0))

812 continue;

821 bool Inverted =

823 return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;

824 }

825 }

826 return SelectInfo.end();

827 }

828

829

830

831

832

834 auto MatchZExtOrSExtPattern =

836 auto MatchShiftPattern =

838

839

840

841 if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||

842 (match(I, MatchShiftPattern) &&

843 X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {

844 if (I->getOpcode() != Instruction::Add &&

845 I->getOpcode() != Instruction::Sub &&

846 I->getOpcode() != Instruction::Or)

847 return SelectInfo.end();

848

849 if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))

850 return SelectInfo.end();

851

852

853

854

855

856 unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;

857 for (; Idx < 2; Idx++) {

858 auto *Op = I->getOperand(Idx);

859 auto It = SelectInfo.find(Op);

860 if (It != SelectInfo.end() && It->second.IsAuxiliary) {

861 Cond = It->second.Cond;

862 bool Inverted = It->second.IsInverted;

863 return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;

864 }

865 }

866 }

867 return SelectInfo.end();

868 };

869

870 bool AlreadyProcessed = false;

873 while (BBIt != BB.end()) {

875 if (I->isDebugOrPseudoInst())

876 continue;

877

878 if (!AlreadyProcessed)

879 It = ProcessSelectInfo(I);

880 else

881 AlreadyProcessed = false;

882

883 if (It == SelectInfo.end() || It->second.IsAuxiliary)

884 continue;

885

886 if (TTI->shouldTreatInstructionLikeSelect(I))

887 continue;

888

890

891 if (Cond->getType()->isIntegerTy(1))

892 continue;

893

894 SelectGroup SIGroup = {Cond, {}};

895 SIGroup.Selects.emplace_back(I, It->second.IsInverted,

896 It->second.ConditionIdx);

897

898

899

900 if (!isSelectKindSupported(SIGroup.Selects.front()))

901 continue;

902

903 while (BBIt != BB.end()) {

905

906

908 ++BBIt;

909 continue;

910 }

911

912 It = ProcessSelectInfo(NI);

913 if (It == SelectInfo.end()) {

914 AlreadyProcessed = true;

915 break;

916 }

917

918

919 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;

920 if (Cond != CurrCond) {

921 AlreadyProcessed = true;

922 break;

923 }

924

925 if (!IsAux)

926 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);

927 ++BBIt;

928 }

930 dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";

931 for (auto &SI : SIGroup.Selects)

932 dbgs() << " " << *SI.getI() << "\n";

933 });

934

935 SIGroups.push_back(SIGroup);

936 }

937}

938

939void SelectOptimizeImpl::findProfitableSIGroupsBase(

940 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {

941 for (SelectGroup &ASI : SIGroups) {

942 ++NumSelectOptAnalyzed;

943 if (isConvertToBranchProfitableBase(ASI))

944 ProfSIGroups.push_back(ASI);

945 }

946}

947

953

954void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(

955 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {

956 NumSelectOptAnalyzed += SIGroups.size();

957

958

959

960

961

962

963

964

965

966

968 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},

969 {Scaled64::getZero(), Scaled64::getZero()}};

970 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||

971 !checkLoopHeuristics(L, LoopCost)) {

972 return;

973 }

974

975 for (SelectGroup &ASI : SIGroups) {

976

977

978 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();

979 for (SelectLike &SI : ASI.Selects) {

980 const auto &ICM = InstCostMap[SI.getI()];

981 SelectCost = std::max(SelectCost, ICM.PredCost);

982 BranchCost = std::max(BranchCost, ICM.NonPredCost);

983 }

984 if (BranchCost < SelectCost) {

986 ASI.Selects.front().getI());

987 OR << "Profitable to convert to branch (loop analysis). BranchCost="

988 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()

989 << ". ";

991 ++NumSelectConvertedLoop;

992 ProfSIGroups.push_back(ASI);

993 } else {

995 ASI.Selects.front().getI());

996 ORmiss << "Select is more profitable (loop analysis). BranchCost="

997 << BranchCost.toString()

998 << ", SelectCost=" << SelectCost.toString() << ". ";

1000 }

1001 }

1002}

1003

1004bool SelectOptimizeImpl::isConvertToBranchProfitableBase(

1005 const SelectGroup &ASI) {

1006 const SelectLike &SI = ASI.Selects.front();

1007 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()

1008 << "\n");

1011

1012

1013 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {

1014 ++NumSelectColdBB;

1015 ORmiss << "Not converted to branch because of cold basic block. ";

1017 return false;

1018 }

1019

1020

1021 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {

1022 ++NumSelectUnPred;

1023 ORmiss << "Not converted to branch because of unpredictable branch. ";

1025 return false;

1026 }

1027

1028

1029

1031 ++NumSelectConvertedHighPred;

1032 OR << "Converted to branch because of highly predictable branch. ";

1034 return true;

1035 }

1036

1037

1038

1039 if (hasExpensiveColdOperand(ASI)) {

1040 ++NumSelectConvertedExpColdOperand;

1041 OR << "Converted to branch because of expensive cold operand.";

1043 return true;

1044 }

1045

1046

1047

1048

1049 auto *BB = SI.getI()->getParent();

1051 if (L && L->isInnermost() && L->getLoopLatch() == BB &&

1052 ASI.Selects.size() >= 3) {

1053 OR << "Converted to branch because select group in the latch block is big.";

1055 return true;

1056 }

1057

1058 ORmiss << "Not profitable to convert to branch (base heuristic).";

1060 return false;

1061}

1062

1065 return (Numerator + (Denominator / 2)) / Denominator;

1066}

1067

1074

1075bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {

1076 bool ColdOperand = false;

1077 uint64_t TrueWeight, FalseWeight, TotalWeight;

1079 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);

1080 TotalWeight = TrueWeight + FalseWeight;

1081

1085 ASI.Selects.front().getI());

1086 ORmiss << "Profile data available but missing branch-weights metadata for "

1087 "select instruction. ";

1089 }

1090 if (!ColdOperand)

1091 return false;

1092

1093

1094 for (SelectLike SI : ASI.Selects) {

1096 uint64_t HotWeight;

1097 if (TrueWeight < FalseWeight) {

1099 HotWeight = FalseWeight;

1100 } else {

1102 HotWeight = TrueWeight;

1103 }

1104 if (ColdI) {

1105 std::stack<Instruction *> ColdSlice;

1106 getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());

1108 while (!ColdSlice.empty()) {

1109 SliceCost += TTI->getInstructionCost(ColdSlice.top(),

1111 ColdSlice.pop();

1112 }

1113

1114

1115

1116

1118 divideNearest(SliceCost * HotWeight, TotalWeight);

1119 if (AdjSliceCost >=

1121 return true;

1122 }

1123 }

1124 return false;

1125}

1126

1127

1128

1129

1131

1132 if (LoadI->getParent() != SI->getParent())

1133 return false;

1135 while (&*It != SI) {

1136 if (It->mayWriteToMemory())

1137 return false;

1138 It++;

1139 }

1140 return true;

1141}

1142

1143

1144

1145

1146

1147

1148

1149void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,

1150 std::stack<Instruction *> &Slice,

1152 bool ForSinking) {

1154 std::queue<Instruction *> Worklist;

1155 Worklist.push(I);

1156 while (!Worklist.empty()) {

1158 Worklist.pop();

1159

1160

1161 if (!Visited.insert(II).second)

1162 continue;

1163

1164 if (II->hasOneUse())

1165 continue;

1166

1167

1168

1169

1170 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||

1172 continue;

1173

1174

1175

1176

1177

1179 continue;

1180

1181

1182

1184 continue;

1185

1186

1187 Slice.push(II);

1188

1189

1190 for (Value *Op : II->operand_values())

1192 Worklist.push(OpI);

1193 }

1194}

1195

1196bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {

1197 uint64_t TrueWeight, FalseWeight;

1199 uint64_t Max = std::max(TrueWeight, FalseWeight);

1200 uint64_t Sum = TrueWeight + FalseWeight;

1201 if (Sum != 0) {

1203 if (Probability > TTI->getPredictableBranchThreshold())

1204 return true;

1205 }

1206 }

1207 return false;

1208}

1209

1210bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,

1211 const CostInfo LoopCost[2]) {

1212

1213

1214

1216 return true;

1217

1219 &*L->getHeader()->getFirstNonPHIIt());

1220

1221 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||

1222 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {

1223 ORmissL << "No select conversion in the loop due to no reduction of loop's "

1224 "critical path. ";

1226 return false;

1227 }

1228

1229 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,

1230 LoopCost[1].PredCost - LoopCost[1].NonPredCost};

1231

1232

1233

1234

1237 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;

1238 ORmissL << "No select conversion in the loop due to small reduction of "

1239 "loop's critical path. Gain="

1240 << Gain[1].toString()

1241 << ", RelativeGain=" << RelativeGain.toString() << "%. ";

1243 return false;

1244 }

1245

1246

1247

1248

1249

1250

1251 if (Gain[1] > Gain[0]) {

1252 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /

1253 (LoopCost[1].PredCost - LoopCost[0].PredCost);

1255 ORmissL << "No select conversion in the loop due to small gradient gain. "

1256 "GradientGain="

1257 << GradientGain.toString() << "%. ";

1259 return false;

1260 }

1261 }

1262

1263 else if (Gain[1] < Gain[0]) {

1264 ORmissL

1265 << "No select conversion in the loop due to negative gradient gain. ";

1267 return false;

1268 }

1269

1270

1271

1272 return true;

1273}

1274

1275

1276

1277

1278

1279bool SelectOptimizeImpl::computeLoopCosts(

1280 const Loop *L, const SelectGroups &SIGroups,

1282 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "

1283 << L->getHeader()->getName() << "\n");

1284 const auto SImap = getSImap(SIGroups);

1285 const auto SGmap = getSGmap(SIGroups);

1286

1287

1288 const unsigned Iterations = 2;

1289 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {

1290

1291 CostInfo &MaxCost = LoopCost[Iter];

1294 if (I.isDebugOrPseudoInst())

1295 continue;

1296

1297 Scaled64 IPredCost = Scaled64::getZero(),

1298 INonPredCost = Scaled64::getZero();

1299

1300

1301

1302

1303 for (const Use &U : I.operands()) {

1305 if (!UI)

1306 continue;

1307 if (auto It = InstCostMap.find(UI); It != InstCostMap.end()) {

1308 IPredCost = std::max(IPredCost, It->second.PredCost);

1309 INonPredCost = std::max(INonPredCost, It->second.NonPredCost);

1310 }

1311 }

1312 auto ILatency = computeInstCost(&I);

1313 if (!ILatency) {

1315 ORmissL << "Invalid instruction cost preventing analysis and "

1316 "optimization of the inner-most loop containing this "

1317 "instruction. ";

1319 return false;

1320 }

1321 IPredCost += Scaled64::get(*ILatency);

1322 INonPredCost += Scaled64::get(*ILatency);

1323

1324

1325

1326

1327

1328

1329

1330 if (auto It = SImap.find(&I); It != SImap.end()) {

1331 auto SI = It->second;

1332 const auto *SG = SGmap.at(&I);

1333 Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);

1334 Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);

1335 Scaled64 PredictedPathCost =

1336 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);

1337

1338 Scaled64 CondCost = Scaled64::getZero();

1340 if (auto It = InstCostMap.find(CI); It != InstCostMap.end())

1341 CondCost = It->second.NonPredCost;

1342 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);

1343

1344 INonPredCost = PredictedPathCost + MispredictCost;

1345 }

1346 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"

1347 << INonPredCost << " for " << I << "\n");

1348

1349 InstCostMap[&I] = {IPredCost, INonPredCost};

1350 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);

1351 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);

1352 }

1353 }

1355 << " MaxCost = " << MaxCost.PredCost << " "

1356 << MaxCost.NonPredCost << "\n");

1357 }

1358 return true;

1359}

1360

1362SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {

1364 for (const SelectGroup &ASI : SIGroups)

1365 for (const SelectLike &SI : ASI.Selects)

1367 return SImap;

1368}

1369

1371SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {

1373 for (const SelectGroup &ASI : SIGroups)

1374 for (const SelectLike &SI : ASI.Selects)

1376 return SImap;

1377}

1378

1379std::optional<uint64_t>

1380SelectOptimizeImpl::computeInstCost(const Instruction *I) {

1384 return std::optional<uint64_t>(ICost.getValue());

1385 return std::nullopt;

1386}

1387

1389SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,

1390 const Scaled64 CondCost) {

1392

1393

1394

1396

1397

1398 if (isSelectHighlyPredictable(SI))

1399 MispredictRate = 0;

1400

1401

1402

1403

1404 Scaled64 MispredictCost =

1405 std::max(Scaled64::get(MispredictPenalty), CondCost) *

1406 Scaled64::get(MispredictRate);

1407 MispredictCost /= Scaled64::get(100);

1408

1409 return MispredictCost;

1410}

1411

1412

1413

1415SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,

1416 const SelectLike SI) {

1417 Scaled64 PredPathCost;

1418 uint64_t TrueWeight, FalseWeight;

1420 uint64_t SumWeight = TrueWeight + FalseWeight;

1421 if (SumWeight != 0) {

1422 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +

1423 FalseCost * Scaled64::get(FalseWeight);

1424 PredPathCost /= Scaled64::get(SumWeight);

1425 return PredPathCost;

1426 }

1427 }

1428

1429

1430 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,

1431 FalseCost * Scaled64::get(3) + TrueCost);

1432 PredPathCost /= Scaled64::get(4);

1433 return PredPathCost;

1434}

1435

1436bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {

1438 if (SI.getType()->isVectorTy())

1440 else

1443}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)

If isTrue is true, return the true value of SI, otherwise return false value of SI.

static bool runOnFunction(Function &F, bool PostInlining)

print mir2vec MIR2Vec Vocabulary Printer Pass

uint64_t IntrinsicInst * II

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file contains the declarations for profiling metadata utility functions.

const SmallVectorImpl< MachineOperand > & Cond

const GCNTargetMachine & getTM(const GCNSubtarget *STI)

static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)

Definition SelectOptimize.cpp:1130

static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)

static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)

static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))

static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)

static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))

static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)

Definition SelectOptimize.cpp:948

static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)

static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)

This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static SymbolRef::Type getType(const Symbol *Sym)

This file describes how to lower LLVM code to machine code.

Target-Independent Code Generator Pass Configuration Options pass.

This pass exposes codegen information to IR-level passes.

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

static const uint32_t IV[8]

AnalysisUsage & addRequired()

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

LLVM_ABI const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)

Insert a DbgRecord into a block at the position given by Here.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)

Split the basic block into two basic blocks at the specified instruction.

InstListType::iterator iterator

Instruction iterators...

LLVM_ABI LLVMContext & getContext() const

Get the context in which this basic block lives.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Analysis pass which computes BlockFrequencyInfo.

Legacy analysis pass which computes BlockFrequencyInfo.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)

LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const

getblockFreq - Return block frequency.

Conditional or Unconditional Branch instruction.

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)

@ ICMP_SLT

signed less than

@ ICMP_SLE

signed less or equal

@ ICMP_SGT

signed greater than

@ ICMP_SGE

signed greater or equal

This is the shared class of boolean and integer constants.

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

Base class for non-instruction debug metadata records that have positions within IR.

LLVM_ABI void removeFromParent()

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...

std::string getMsg() const

FunctionPass class - This class is used to implement most global optimizations.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

CostType getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY

Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.

LLVM_ABI void insertBefore(InstListType::iterator InsertPos)

Insert an unlinked instruction into a basic block immediately before the specified position.

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

Analysis pass that exposes the LoopInfo for a function.

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

The legacy pass manager's analysis pass to compute loop information.

Represents a single loop in the control flow graph.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.

Analysis providing profile information.

bool hasProfileSummary() const

Returns true if profile summary is available.

bool isColdBlock(const BBType *BB, BFIT *BFI) const

Returns true if BasicBlock BB is considered cold.

Simple representation of a scaled number.

static ScaledNumber get(uint64_t N)

static ScaledNumber getZero()

PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)

Definition SelectOptimize.cpp:331

bool insert(const value_type &X)

Insert a new element into the SetVector.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Analysis pass providing the TargetTransformInfo.

virtual bool isSelectSupported(SelectSupportKind) const

SelectSupportKind

Enum that describes what type of support for selects the target has.

bool isPredictableSelectExpensive() const

Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Primary interface to the complete machine description for the target machine.

virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const

Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...

Target-Independent Code Generator Pass Configuration Options.

Provide an instruction scheduling machine model to CodeGen passes.

const MCSchedModel * getMCSchedModel() const

LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)

Initialize the machine model for instruction scheduling.

TargetSubtargetInfo - Generic base class for all target subtargets.

virtual const TargetLowering * getTargetLowering() const

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

@ TCK_Latency

The latency of instruction.

LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const

This is an approximation of reciprocal throughput of a math/logic op.

@ TCC_Expensive

The cost of a 'div' instruction on x86.

bool isIntegerTy() const

True if this is an instance of IntegerType.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

const ParentTy * getParent() const

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)

Matches a register not-ed by a G_XOR.

OneUse_match< SubPat > m_OneUse(const SubPat &SP)

bool match(Val *V, const Pattern &P)

BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)

Matches logical shift operations.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)

Matches a BinaryOperator with LHS and RHS in either order.

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

UnaryFunction for_each(R &&Range, UnaryFunction F)

Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy

Provide the ModuleAnalysisManager to Function proxy.

LLVM_ABI FunctionPass * createSelectOptimizePass()

This pass converts conditional moves to conditional jumps when profitable.

Definition SelectOptimize.cpp:350

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

constexpr T divideNearest(U Numerator, V Denominator)

Returns (Numerator / Denominator) rounded by round-half-up.

auto dyn_cast_or_null(const Y &Val)

LLVM_ABI void initializeSelectOptimizePass(PassRegistry &)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

DWARFExpression::Operation Op

LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)

Extract branch weights from MD_prof metadata.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

unsigned MispredictPenalty