LLVM: lib/Transforms/Scalar/SimpleLoopUnswitch.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

62#include

63#include

64#include

65#include

66#include

67#include

68

69#define DEBUG_TYPE "simple-loop-unswitch"

70

71using namespace llvm;

73

74STATISTIC(NumBranches, "Number of branches unswitched");

75STATISTIC(NumSwitches, "Number of switches unswitched");

76STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");

77STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");

78STATISTIC(NumTrivial, "Number of unswitches that are trivial");

80 NumCostMultiplierSkipped,

81 "Number of unswitch candidates that had their cost multiplier skipped");

83 "Number of invariant conditions injected and unswitched");

84

87 cl::desc("Forcibly enables non-trivial loop unswitching rather than "

88 "following the configuration passed into the pass."));

89

92 cl::desc("The cost threshold for unswitching a loop."));

93

96 cl::desc("Enable unswitch cost multiplier that prohibits exponential "

97 "explosion in nontrivial unswitch."));

100 cl::desc("Toplevel siblings divisor for cost multiplier."));

103 cl::desc("Number of unswitch candidates that are ignored when calculating "

104 "cost multiplier."));

107 cl::desc("If enabled, simple loop unswitching will also consider "

108 "llvm.experimental.guard intrinsics as unswitch candidates."));

110 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",

112 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "

113 "null checks to save time analyzing if we can keep it."));

116 cl::desc("Max number of memory uses to explore during "

117 "partial unswitching analysis"),

121 cl::desc("If enabled, the freeze instruction will be added to condition "

122 "of loop unswitch to prevent miscompilation."));

123

125 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,

126 cl::desc("Whether we should inject new invariants and unswitch them to "

127 "eliminate some existing (non-invariant) conditions."),

129

131 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",

132 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "

133 "unswitch on them to eliminate branches that are "

134 "not-taken 1/ times or less."),

136

138namespace {

139struct CompareDesc {

141 Value *Invariant;

143

145 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}

146};

147

148struct InjectedInvariant {

153

156 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}

157};

158

159struct NonTrivialUnswitchCandidate {

162 std::optional Cost;

163 std::optional PendingInjection;

164 NonTrivialUnswitchCandidate(

166 std::optional Cost = std::nullopt,

167 std::optional PendingInjection = std::nullopt)

168 : TI(TI), Invariants(Invariants), Cost(Cost),

169 PendingInjection(PendingInjection) {};

170

171 bool hasPendingInjection() const { return PendingInjection.has_value(); }

172};

173}

174

175

176

177

181 Cond = CondNext;

183}

184

185

186

187

188

189

190

191

195 assert(!L.isLoopInvariant(&Root) &&

196 "Only need to walk the graph if root itself is not invariant.");

198

201

202

206 Visited.insert(&Root);

207 do {

209 for (Value *OpV : I.operand_values()) {

210

211 if (isa(OpV))

212 continue;

213

214

215 if (L.isLoopInvariant(OpV)) {

217 continue;

218 }

219

220

222

225

226 if (Visited.insert(OpI).second)

228 }

229 }

230 } while (!Worklist.empty());

231

232 return Invariants;

233}

234

237 assert(!isa(Invariant) && "Why are we unswitching on a constant?");

238

239

240

242 Instruction *UserI = dyn_cast(U.getUser());

243

244

245 if (UserI && L.contains(UserI))

246 U.set(&Replacement);

247 }

248}

249

250

251

256 auto *PN = dyn_cast(&I);

257 if (!PN)

258

259 return true;

260

261

262

263 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))

264 return false;

265 }

267}

268

269

270

271

277

279 for (Value *Inv : Invariants) {

281 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");

283 }

284

286 : IRB.CreateAnd(FrozenInvariants);

288 Direction ? &NormalSucc : &UnswitchedSucc);

289}

290

291

297 for (auto *Val : reverse(ToDuplicate)) {

298 Instruction *Inst = cast(Val);

303 VMap[Val] = NewInst;

304

305 if (!MSSAU)

306 continue;

307

309 if (auto *MemUse =

310 dyn_cast_or_null(MSSA->getMemoryAccess(Inst))) {

311 auto *DefiningAccess = MemUse->getDefiningAccess();

312

313 while (L.contains(DefiningAccess->getBlock())) {

314

315

316 if (auto *MemPhi = dyn_cast(DefiningAccess))

317 DefiningAccess =

318 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());

319 else

320 DefiningAccess = cast(DefiningAccess)->getDefiningAccess();

321 }

325 }

326 }

327

329 Value *Cond = VMap[ToDuplicate[0]];

331 Direction ? &NormalSucc : &UnswitchedSucc);

332}

333

334

335

336

337

338

339

340

344 for (PHINode &PN : UnswitchedBB.phis()) {

345

346

347

348 for (auto i : seq(0, PN.getNumOperands())) {

349 assert(PN.getIncomingBlock(i) == &OldExitingBB &&

350 "Found incoming block different from unique predecessor!");

351 PN.setIncomingBlock(i, &OldPH);

352 }

353 }

354}

355

356

357

358

359

360

361

362

367 bool FullUnswitch) {

368 assert(&ExitBB != &UnswitchedBB &&

369 "Must have different loop exit and unswitched blocks!");

372 auto *NewPN = PHINode::Create(PN.getType(), 2,

373 PN.getName() + ".split");

374 NewPN->insertBefore(InsertPt);

375

376

377

378

379

380

381

382

383

384

385 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {

386 if (PN.getIncomingBlock(i) != &OldExitingBB)

387 continue;

388

390 if (FullUnswitch)

391

392 PN.removeIncomingValue(i);

393

394 NewPN->addIncoming(Incoming, &OldPH);

395 }

396

397

398

399 PN.replaceAllUsesWith(NewPN);

400 NewPN->addIncoming(&PN, &ExitBB);

401 }

402}

403

404

405

406

407

408

412

413 Loop *OldParentL = L.getParentLoop();

414 if (!OldParentL)

415 return;

416

418 L.getExitBlocks(Exits);

419 Loop *NewParentL = nullptr;

420 for (auto *ExitBB : Exits)

422 if (!NewParentL || NewParentL->contains(ExitL))

423 NewParentL = ExitL;

424

425 if (NewParentL == OldParentL)

426 return;

427

428

429 if (NewParentL)

431 "Can only hoist this loop up the nest!");

432

433

434

436 "Parent loop of this loop should contain this loop's preheader!");

438

439

441

442

443 if (NewParentL)

445 else

447

448

449

450

451 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;

452 OldContainingL = OldContainingL->getParentLoop()) {

455 return BB == &Preheader || L.contains(BB);

456 });

457

458 OldContainingL->getBlocksSet().erase(&Preheader);

460 OldContainingL->getBlocksSet().erase(BB);

461

462

463

464

465 formLCSSA(*OldContainingL, DT, &LI, SE);

466

467

468

469

470

471

473 true);

474 }

475}

476

477

478

479

483 Loop *Current = TopMost;

484 while (Current) {

486 TopMost = Current;

488 }

489 return TopMost;

490}

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

513 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");

514

515

517

518

519

520 bool FullUnswitch = false;

521

523 if (L.isLoopInvariant(Cond)) {

525 FullUnswitch = true;

526 } else {

527 if (auto *CondInst = dyn_cast(Cond))

529 if (Invariants.empty()) {

530 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");

531 return false;

532 }

533 }

534

535

536 bool ExitDirection = true;

537 int LoopExitSuccIdx = 0;

539 if (L.contains(LoopExitBB)) {

540 ExitDirection = false;

541 LoopExitSuccIdx = 1;

543 if (L.contains(LoopExitBB)) {

544 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");

545 return false;

546 }

547 }

548 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);

551 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");

552 return false;

553 }

554

555

556

557

558

559

560 if (!FullUnswitch) {

563 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "

564 "non-full unswitch!\n");

565 return false;

566 }

567 }

568

570 dbgs() << " unswitching trivial invariant conditions for: " << BI

571 << "\n";

572 for (Value *Invariant : Invariants) {

573 dbgs() << " " << *Invariant << " == true";

574 if (Invariant != Invariants.back())

575 dbgs() << " ||";

576 dbgs() << "\n";

577 }

578 });

579

580

581

582

583 if (SE) {

586 else

587

590 }

591

594

595

596

597

598 BasicBlock *OldPH = L.getLoopPreheader();

600

601

602

603

604

606 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {

607 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&

608 "A branch's parent isn't a predecessor!");

609 UnswitchedBB = LoopExitBB;

610 } else {

611 UnswitchedBB =

612 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);

613 }

614

617

618

619

620

622 if (FullUnswitch) {

623

624

625

628 if (MSSAU) {

629

630

632 } else {

633

634

637 }

638 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);

640 } else {

641

642

643 if (ExitDirection)

645 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "

646 "condition!");

647 else

649 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"

650 " condition!");

652 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,

654 }

655

656

658

659

660

661 if (MSSAU) {

663 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});

665 }

666

667

668 if (FullUnswitch) {

669 if (MSSAU) {

670 Instruction *Term = ParentBB->getTerminator();

671

672

675 Term->eraseFromParent();

676 MSSAU->removeEdge(ParentBB, LoopExitBB);

677 }

679 }

680

683

684

685 if (UnswitchedBB == LoopExitBB)

687 else

689 *ParentBB, *OldPH, FullUnswitch);

690

691

692

693

697

698

699

700 for (Value *Invariant : Invariants)

702

703

704

705 if (FullUnswitch)

707

710

711 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");

712 ++NumTrivial;

713 ++NumBranches;

714 return true;

715}

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

746 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");

747 Value *LoopCond = SI.getCondition();

748

749

750 if (!L.isLoopInvariant(LoopCond))

751 return false;

752

753 auto *ParentBB = SI.getParent();

754

755

756

757

758

759

760 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {

761

762 if (L.contains(&BBToCheck))

763 return false;

764

766 return false;

767

768

769

770

771 auto *TI = BBToCheck.getTerminator();

772 bool isUnreachable = isa(TI);

773 return !isUnreachable || BBToCheck.getFirstNonPHIOrDbg() != TI;

774 };

775

777 for (auto Case : SI.cases())

778 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))

779 ExitCaseIndices.push_back(Case.getCaseIndex());

783 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {

784 DefaultExitBB = SI.getDefaultDest();

785 } else if (ExitCaseIndices.empty())

786 return false;

787

788 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");

789

792

793

794

795 Loop *OuterL = &L;

796

797 if (DefaultExitBB) {

798

800 if (!ExitL || ExitL->contains(OuterL))

801 OuterL = ExitL;

802 }

803 for (unsigned Index : ExitCaseIndices) {

804 auto CaseI = SI.case_begin() + Index;

805

807 if (!ExitL || ExitL->contains(OuterL))

808 OuterL = ExitL;

809 }

810

811 if (SE) {

812 if (OuterL)

814 else

816 }

817

818 if (DefaultExitBB) {

819

820

821 SI.setDefaultDest(nullptr);

822 }

823

824

825

828 4> ExitCases;

829 ExitCases.reserve(ExitCaseIndices.size());

831

832

833 for (unsigned Index : reverse(ExitCaseIndices)) {

834 auto CaseI = SI.case_begin() + Index;

835

837 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);

838

840 }

841

842

843

845 if (SI.getNumCases() > 0 &&

847 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();

848 }))

849 CommonSuccBB = SI.case_begin()->getCaseSuccessor();

850 if (!DefaultExitBB) {

851

852

853

854 if (SI.getNumCases() == 0)

855 CommonSuccBB = SI.getDefaultDest();

856 else if (SI.getDefaultDest() != CommonSuccBB)

857 CommonSuccBB = nullptr;

858 }

859

860

861

862 BasicBlock *OldPH = L.getLoopPreheader();

865

866

867

868

872

873

874

875

876

877

878

881

882

883

884 if (DefaultExitBB) {

886 UnswitchedExitBBs.insert(DefaultExitBB);

888 } else {

889 auto *SplitBB =

890 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);

892 *ParentBB, *OldPH,

893 true);

894 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;

895 }

896 }

897

898

899 for (auto &ExitCase : reverse(ExitCases)) {

900

901 BasicBlock *ExitBB = std::get<1>(ExitCase);

902

903

904

906

907 if (UnswitchedExitBBs.insert(ExitBB).second)

909 continue;

910 }

911

912

913

914 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];

915 if (!SplitExitBB) {

916

917 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);

919 *ParentBB, *OldPH,

920 true);

921 }

922

923 std::get<1>(ExitCase) = SplitExitBB;

924 }

925

926

927

928 for (auto &ExitCase : reverse(ExitCases)) {

929 ConstantInt *CaseVal = std::get<0>(ExitCase);

930 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);

931

932 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));

933 }

934

935

936

937 if (DefaultExitBB) {

940

941

942

943 for (const auto &Case : SI.cases())

946 } else if (DefaultCaseWeight) {

947

948 uint64_t SW = *DefaultCaseWeight;

949 for (const auto &Case : SI.cases()) {

952 "case weight must be defined as default case weight is defined");

953 SW += *W;

954 }

956 }

957

958

959

960

961

962 if (CommonSuccBB) {

964

965

966

967 bool SkippedFirst = DefaultExitBB == nullptr;

968 for (auto Case : SI.cases()) {

970 "Non-common successor!");

971 (void)Case;

972 if (!SkippedFirst) {

973 SkippedFirst = true;

974 continue;

975 }

977 true);

978 }

979

983 } else if (DefaultExitBB) {

984 assert(SI.getNumCases() > 0 &&

985 "If we had no cases we'd have a common successor!");

986

987

988

989

990 auto LastCaseI = std::prev(SI.case_end());

991

992 SI.setDefaultDest(LastCaseI->getCaseSuccessor());

996 }

997

998

999

1000

1001

1003 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {

1004 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});

1005 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});

1006 }

1007 for (auto SplitUnswitchedPair : SplitExitBBMap) {

1008 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});

1009 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});

1010 }

1011

1012 if (MSSAU) {

1013 MSSAU->applyUpdates(DTUpdates, DT, true);

1016 } else {

1018 }

1019

1020 assert(DT.verify(DominatorTree::VerificationLevel::Fast));

1021

1022

1023

1025

1028

1029 ++NumTrivial;

1030 ++NumSwitches;

1031 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");

1032 return true;

1033}

1034

1035

1036

1037

1038

1039

1040

1041

1042

1043

1044

1045

1046

1050 bool Changed = false;

1051

1052

1053

1054

1055

1056

1057

1058

1059

1060

1061

1062

1063 BasicBlock *CurrentBB = L.getHeader();

1065 Visited.insert(CurrentBB);

1066 do {

1067

1068

1069

1070 if (MSSAU)

1072 if (!isa(*Defs->begin()) || (++Defs->begin() != Defs->end()))

1073 return Changed;

1075 [](Instruction &I) { return I.mayHaveSideEffects(); }))

1076 return Changed;

1077

1079

1080 if (auto *SI = dyn_cast(CurrentTerm)) {

1081

1082

1083

1084 if (isa(SI->getCondition()))

1085 return Changed;

1086

1088

1089 return Changed;

1090

1091

1092 Changed = true;

1093

1094

1095

1096

1097

1098 auto *BI = dyn_cast(CurrentBB->getTerminator());

1099 if (!BI || BI->isConditional())

1100 return Changed;

1101

1102 CurrentBB = BI->getSuccessor(0);

1103 continue;

1104 }

1105

1106 auto *BI = dyn_cast(CurrentTerm);

1107 if (!BI)

1108

1109 return Changed;

1110

1111

1112

1113

1114 if (!BI->isConditional() ||

1116 return Changed;

1117

1118

1119

1121 return Changed;

1122

1123

1124 Changed = true;

1125

1126

1127

1128 BI = cast(CurrentBB->getTerminator());

1129 if (BI->isConditional())

1130 return Changed;

1131

1132

1133 CurrentBB = BI->getSuccessor(0);

1134

1135

1136

1137

1138 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);

1139

1140 return Changed;

1141}

1142

1143

1144

1145

1146

1147

1148

1149

1150

1151

1152

1153

1154

1155

1156

1157

1158

1159

1160

1161

1162

1163

1164

1165

1176 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());

1177

1178

1179

1180 auto CloneBlock = [&](BasicBlock *OldBB) {

1181

1184

1185

1187 VMap[OldBB] = NewBB;

1188

1189 return NewBB;

1190 };

1191

1192

1193

1194 auto SkipBlock = [&](BasicBlock *BB) {

1195 auto It = DominatingSucc.find(BB);

1196 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;

1197 };

1198

1199

1200 auto *ClonedPH = CloneBlock(LoopPH);

1201

1202

1203 for (auto *LoopBB : L.blocks())

1204 if (!SkipBlock(LoopBB))

1205 CloneBlock(LoopBB);

1206

1207

1208

1209

1210 for (auto *ExitBB : ExitBlocks) {

1211 if (SkipBlock(ExitBB))

1212 continue;

1213

1214

1215

1216

1217

1218

1219 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);

1220

1221

1222

1223

1224 MergeBB->takeName(ExitBB);

1225 ExitBB->setName(Twine(MergeBB->getName()) + ".split");

1226

1227

1228 auto *ClonedExitBB = CloneBlock(ExitBB);

1229 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&

1230 "Exit block should have been split to have one successor!");

1231 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&

1232 "Cloned exit block has the wrong successor!");

1233

1234

1236 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),

1238 std::prev(ClonedExitBB->end())))) {

1240 Instruction &ClonedI = std::get<1>(ZippedInsts);

1241

1242

1243

1245 (isa(I) || isa(I) || isa(I)) &&

1246 "Bad instruction in exit block!");

1247

1248 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");

1249

1250

1251 if (SE)

1252 if (auto *PN = dyn_cast(&I))

1254

1256

1257 auto *MergePN =

1258 PHINode::Create(I.getType(), 2, ".us-phi");

1259 MergePN->insertBefore(InsertPt);

1260 MergePN->setDebugLoc(InsertPt->getDebugLoc());

1261 I.replaceAllUsesWith(MergePN);

1262 MergePN->addIncoming(&I, ExitBB);

1263 MergePN->addIncoming(&ClonedI, ClonedExitBB);

1264 }

1265 }

1266

1267

1268

1269

1270

1271

1272 Module *M = ClonedPH->getParent()->getParent();

1273 for (auto *ClonedBB : NewBlocks)

1279 if (auto *II = dyn_cast(&I))

1281 }

1282

1283

1284

1285 for (auto *LoopBB : L.blocks())

1286 if (SkipBlock(LoopBB))

1287 for (auto *SuccBB : successors(LoopBB))

1288 if (auto *ClonedSuccBB = cast_or_null(VMap.lookup(SuccBB)))

1289 for (PHINode &PN : ClonedSuccBB->phis())

1290 PN.removeIncomingValue(LoopBB, false);

1291

1292

1293

1294 auto *ClonedParentBB = cast(VMap.lookup(ParentBB));

1295 for (auto *SuccBB : successors(ParentBB)) {

1296 if (SuccBB == UnswitchedSuccBB)

1297 continue;

1298

1299 auto *ClonedSuccBB = cast_or_null(VMap.lookup(SuccBB));

1300 if (!ClonedSuccBB)

1301 continue;

1302

1303 ClonedSuccBB->removePredecessor(ClonedParentBB,

1304 true);

1305 }

1306

1307

1308

1309 auto *ClonedSuccBB = cast(VMap.lookup(UnswitchedSuccBB));

1310 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();

1311

1312

1313 Value *ClonedConditionToErase = nullptr;

1314 if (auto *BI = dyn_cast(ClonedTerminator))

1315 ClonedConditionToErase = BI->getCondition();

1316 else if (auto *SI = dyn_cast(ClonedTerminator))

1317 ClonedConditionToErase = SI->getCondition();

1318

1322

1323 if (ClonedConditionToErase)

1325 MSSAU);

1326

1327

1328

1329

1330 for (PHINode &PN : ClonedSuccBB->phis()) {

1331 bool Found = false;

1332

1333

1334 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {

1335 if (PN.getIncomingBlock(i) != ClonedParentBB)

1336 continue;

1337 if (!Found) {

1338 Found = true;

1339 continue;

1340 }

1341 PN.removeIncomingValue(i, false);

1342 }

1343 }

1344

1345

1347 for (auto *ClonedBB : NewBlocks) {

1348 for (auto *SuccBB : successors(ClonedBB))

1349 if (SuccSet.insert(SuccBB).second)

1350 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});

1351 SuccSet.clear();

1352 }

1353

1354 return ClonedPH;

1355}

1356

1357

1358

1359

1360

1361

1362

1365 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {

1366 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");

1367 ClonedL.reserveBlocks(OrigL.getNumBlocks());

1368 for (auto *BB : OrigL.blocks()) {

1369 auto *ClonedBB = cast(VMap.lookup(BB));

1370 ClonedL.addBlockEntry(ClonedBB);

1373 }

1374 };

1375

1376

1377

1379 if (RootParentL)

1381 else

1383 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);

1384

1386 return ClonedRootL;

1387

1388

1389

1390

1392

1393

1395 LoopsToClone.push_back({ClonedRootL, ChildL});

1396 do {

1397 Loop *ClonedParentL, *L;

1398 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();

1401 AddClonedBlocksToLoop(*L, *ClonedL);

1403 LoopsToClone.push_back({ClonedL, ChildL});

1404 } while (!LoopsToClone.empty());

1405

1406 return ClonedRootL;

1407}

1408

1409

1410

1411

1412

1413

1414

1415

1416

1417

1418

1419

1420

1421

1425 Loop *ClonedL = nullptr;

1426

1428 auto *OrigHeader = OrigL.getHeader();

1429

1430 auto *ClonedPH = cast(VMap.lookup(OrigPH));

1431 auto *ClonedHeader = cast(VMap.lookup(OrigHeader));

1432

1433

1434

1435

1436

1437 Loop *ParentL = nullptr;

1440 ClonedExitsInLoops.reserve(ExitBlocks.size());

1441 for (auto *ExitBB : ExitBlocks)

1442 if (auto *ClonedExitBB = cast_or_null(VMap.lookup(ExitBB)))

1444 ExitLoopMap[ClonedExitBB] = ExitL;

1445 ClonedExitsInLoops.push_back(ClonedExitBB);

1446 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))

1447 ParentL = ExitL;

1448 }

1451 "The computed parent loop should always contain (or be) the parent of "

1452 "the original loop.");

1453

1454

1455

1456

1457

1459 for (auto *BB : OrigL.blocks())

1460 if (auto *ClonedBB = cast_or_null(VMap.lookup(BB)))

1461 ClonedLoopBlocks.insert(ClonedBB);

1462

1463

1464

1465

1466

1469 for (auto *Pred : predecessors(ClonedHeader)) {

1470

1471

1472 if (Pred == ClonedPH)

1473 continue;

1474

1475

1476

1477 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "

1478 "header other than the preheader "

1479 "that is not part of the loop!");

1480

1481

1482

1483

1484 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)

1486 }

1487

1488

1489

1490

1491 if (!BlocksInClonedLoop.empty()) {

1492 BlocksInClonedLoop.insert(ClonedHeader);

1493

1494 while (!Worklist.empty()) {

1497 "Didn't put block into the loop set!");

1498

1499

1500

1501

1502

1503

1505 if (ClonedLoopBlocks.count(Pred) &&

1506 BlocksInClonedLoop.insert(Pred).second)

1508 }

1509

1511 if (ParentL) {

1514 } else {

1516 }

1517 NonChildClonedLoops.push_back(ClonedL);

1518

1520

1521

1522

1523

1524

1525 for (auto *BB : OrigL.blocks()) {

1526 auto *ClonedBB = cast_or_null(VMap.lookup(BB));

1527 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))

1528 continue;

1529

1530

1533 continue;

1534 }

1535

1536

1537

1538

1539 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())

1540 PL->addBlockEntry(ClonedBB);

1541 }

1542

1543

1544

1545

1546

1547 for (Loop *ChildL : OrigL) {

1548 auto *ClonedChildHeader =

1549 cast_or_null(VMap.lookup(ChildL->getHeader()));

1550 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))

1551 continue;

1552

1553#ifndef NDEBUG

1554

1555

1556 for (auto *ChildLoopBB : ChildL->blocks())

1558 cast(VMap.lookup(ChildLoopBB))) &&

1559 "Child cloned loop has a header within the cloned outer "

1560 "loop but not all of its blocks!");

1561#endif

1562

1564 }

1565 }

1566

1567

1568

1569

1570

1571

1572

1573

1575 if (BlocksInClonedLoop.empty())

1576 UnloopedBlockSet.insert(ClonedPH);

1577 for (auto *ClonedBB : ClonedLoopBlocks)

1578 if (!BlocksInClonedLoop.count(ClonedBB))

1579 UnloopedBlockSet.insert(ClonedBB);

1580

1581

1582

1583

1584

1585 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;

1587 return ExitLoopMap.lookup(LHS)->getLoopDepth() <

1588 ExitLoopMap.lookup(RHS)->getLoopDepth();

1589 });

1590

1591

1592

1593 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {

1594 assert(Worklist.empty() && "Didn't clear worklist!");

1595

1596 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();

1597 Loop *ExitL = ExitLoopMap.lookup(ExitBB);

1598

1599

1600

1602 do {

1604

1605 if (BB == ClonedPH)

1606 continue;

1607

1609

1610

1611 if (!UnloopedBlockSet.erase(PredBB)) {

1613 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&

1614 "Predecessor not mapped to a loop!");

1615 continue;

1616 }

1617

1618

1619

1620

1621 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;

1622 (void)Inserted;

1623 assert(Inserted && "Should only visit an unlooped block once!");

1624

1625

1627 }

1628 } while (!Worklist.empty());

1629 }

1630

1631

1632

1633

1634

1635

1636

1637 for (auto *BB : llvm::concat<BasicBlock *const>(

1638 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))

1639 if (Loop *OuterL = ExitLoopMap.lookup(BB))

1640 OuterL->addBasicBlockToLoop(BB, LI);

1641

1642#ifndef NDEBUG

1643 for (auto &BBAndL : ExitLoopMap) {

1644 auto *BB = BBAndL.first;

1645 auto *OuterL = BBAndL.second;

1647 "Failed to put all blocks into outer loops!");

1648 }

1649#endif

1650

1651

1652

1653

1654 for (Loop *ChildL : OrigL) {

1655 auto *ClonedChildHeader =

1656 cast_or_null(VMap.lookup(ChildL->getHeader()));

1657 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))

1658 continue;

1659

1660#ifndef NDEBUG

1661 for (auto *ChildLoopBB : ChildL->blocks())

1663 "Cloned a child loop header but not all of that loops blocks!");

1664#endif

1665

1667 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));

1668 }

1669}

1670

1671static void

1673 ArrayRef<std::unique_ptr> VMaps,

1675

1677 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))

1678 for (const auto &VMap : VMaps)

1679 if (BasicBlock *ClonedBB = cast_or_null(VMap->lookup(BB)))

1682 SuccBB->removePredecessor(ClonedBB);

1684 }

1685

1686

1687 if (MSSAU) {

1689 DeadBlocks.end());

1691 }

1692

1693

1695 BB->dropAllReferences();

1696

1698 BB->eraseFromParent();

1699}

1700

1707

1708

1710

1711

1712

1714 ExitBlocks.end());

1715 DeathCandidates.append(L.blocks().begin(), L.blocks().end());

1716 while (!DeathCandidates.empty()) {

1720 SuccBB->removePredecessor(BB);

1721 DeathCandidates.push_back(SuccBB);

1722 }

1723 DeadBlockSet.insert(BB);

1724 }

1725 }

1726

1727

1728 if (MSSAU)

1730

1731

1732

1734 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });

1735

1736

1737 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {

1738 for (auto *BB : DeadBlockSet)

1739 ParentL->getBlocksSet().erase(BB);

1741 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });

1742 }

1743

1744

1745

1747 if (!DeadBlockSet.count(ChildL->getHeader()))

1748 return false;

1749

1750 assert(llvm::all_of(ChildL->blocks(),

1751 [&](BasicBlock *ChildBB) {

1752 return DeadBlockSet.count(ChildBB);

1753 }) &&

1754 "If the child loop header is dead all blocks in the child loop must "

1755 "be dead as well!");

1757 if (SE)

1760 return true;

1761 });

1762

1763

1764

1765

1766 for (auto *BB : DeadBlockSet) {

1767

1768 assert(!DT.getNode(BB) && "Should already have cleared domtree!");

1769 LI.changeLoopFor(BB, nullptr);

1770

1771

1772 for (auto &I : *BB)

1773 if (I.use_empty())

1775 BB->dropAllReferences();

1776 }

1777

1778

1779

1780 for (auto *BB : DeadBlockSet)

1781 BB->eraseFromParent();

1782}

1783

1784

1785

1786

1787

1788

1789

1790

1791

1792

1793

1794

1798

1799 auto *PH = L.getLoopPreheader();

1800 auto *Header = L.getHeader();

1801

1802

1804

1805

1806

1808

1809 if (Pred == PH)

1810 continue;

1811

1812

1813

1814 assert(L.contains(Pred) && "Found a predecessor of the loop header other "

1815 "than the preheader that is not part of the "

1816 "loop!");

1817

1818

1819

1820

1821 if (LoopBlockSet.insert(Pred).second && Pred != Header)

1823 }

1824

1825

1826 if (LoopBlockSet.empty())

1827 return LoopBlockSet;

1828

1829

1830 while (!Worklist.empty()) {

1832 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");

1833

1834

1835 if (BB == Header)

1836 continue;

1837

1838

1839

1840

1841

1843 if (InnerL != &L) {

1844 assert(L.contains(InnerL) &&

1845 "Should not reach a loop *outside* this loop!");

1846

1847

1848 auto *InnerPH = InnerL->getLoopPreheader();

1849 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "

1850 "but not contain the inner loop "

1851 "preheader!");

1852 if (!LoopBlockSet.insert(InnerPH).second)

1853

1854

1855 continue;

1856

1857

1858

1859

1860

1861

1862 for (auto *InnerBB : InnerL->blocks()) {

1863 if (InnerBB == BB) {

1865 "Block should already be in the set!");

1866 continue;

1867 }

1868

1869 LoopBlockSet.insert(InnerBB);

1870 }

1871

1872

1873

1875 continue;

1876 }

1877

1878

1879

1881 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)

1883 }

1884

1885 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");

1886

1887

1888

1889 return LoopBlockSet;

1890}

1891

1892

1893

1894

1895

1896

1897

1898

1899

1900

1901

1902

1903

1904

1905

1910 auto *PH = L.getLoopPreheader();

1911

1912

1913

1914 Loop *ParentL = nullptr;

1917 ExitsInLoops.reserve(ExitBlocks.size());

1918 for (auto *ExitBB : ExitBlocks)

1922 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))

1923 ParentL = ExitL;

1924 }

1925

1926

1927

1929

1930

1931

1932

1933

1934 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {

1935

1936 for (Loop *IL = L.getParentLoop(); IL != ParentL;

1938 IL->getBlocksSet().erase(PH);

1939 for (auto *BB : L.blocks())

1940 IL->getBlocksSet().erase(BB);

1942 return BB == PH || L.contains(BB);

1943 });

1944 }

1945

1947 L.getParentLoop()->removeChildLoop(&L);

1948 if (ParentL)

1950 else

1952 }

1953

1954

1955 auto &Blocks = L.getBlocksVector();

1956 auto BlocksSplitI =

1957 LoopBlockSet.empty()

1959 : std::stable_partition(

1961 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });

1962

1963

1965 if (LoopBlockSet.empty())

1966 UnloopedBlocks.insert(PH);

1967

1968

1970 L.getBlocksSet().erase(BB);

1972

1973

1974

1977 });

1978

1979

1981 Loop *PrevExitL = L.getParentLoop();

1982

1983 auto RemoveUnloopedBlocksFromLoop =

1985 for (auto *BB : UnloopedBlocks)

1986 L.getBlocksSet().erase(BB);

1988 return UnloopedBlocks.count(BB);

1989 });

1990 };

1991

1993 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {

1994 assert(Worklist.empty() && "Didn't clear worklist!");

1995 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");

1996

1997

2000 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");

2001

2002

2003

2004

2005

2006 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())

2007 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);

2008

2009

2010

2012 do {

2014

2015 if (BB == PH)

2016 continue;

2017

2019

2020

2021 if (!UnloopedBlocks.erase(PredBB)) {

2022 assert((NewExitLoopBlocks.count(PredBB) ||

2024 "Predecessor not in a nested loop (or already visited)!");

2025 continue;

2026 }

2027

2028

2029

2030

2031 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;

2032 (void)Inserted;

2033 assert(Inserted && "Should only visit an unlooped block once!");

2034

2035

2037 }

2038 } while (!Worklist.empty());

2039

2040

2041

2042

2043 for (auto *BB : NewExitLoopBlocks)

2045 if (BBL == &L || !L.contains(BBL))

2047

2048

2049

2050 NewExitLoopBlocks.clear();

2051 }

2052

2053

2054

2055 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())

2056 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);

2057 for (auto *BB : UnloopedBlocks)

2059 if (BBL == &L || !L.contains(BBL))

2061

2062

2063

2064

2065 auto &SubLoops = L.getSubLoopsVector();

2066 auto SubLoopsSplitI =

2067 LoopBlockSet.empty()

2068 ? SubLoops.begin()

2069 : std::stable_partition(

2070 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {

2071 return LoopBlockSet.count(SubL->getHeader());

2072 });

2073 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {

2074 HoistedLoops.push_back(HoistedL);

2075 HoistedL->setParentLoop(nullptr);

2076

2077

2078

2079

2080

2081

2082

2083

2084

2085 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))

2086 NewParentL->addChildLoop(HoistedL);

2087 else

2089 }

2090 SubLoops.erase(SubLoopsSplitI, SubLoops.end());

2091

2092

2093 if (Blocks.empty()) {

2094 assert(SubLoops.empty() &&

2095 "Failed to remove all subloops from the original loop!");

2096 if (Loop *ParentL = L.getParentLoop())

2098 else

2100

2101

2102 if (SE)

2105 return false;

2106 }

2107

2108 return true;

2109}

2110

2111

2112

2113

2114template

2118#ifndef NDEBUG

2120 Visited.insert(DT[BB]);

2121#endif

2122 do {

2124

2125

2126 if (!Callable(N->getBlock()))

2127 continue;

2128

2129

2132 "Cannot visit a node twice when walking a tree!");

2134 }

2135 } while (!DomWorklist.empty());

2136}

2137

2139 bool CurrentLoopValid, bool PartiallyInvariant,

2141

2142 if (!NewLoops.empty())

2143 U.addSiblingLoops(NewLoops);

2144

2145

2146

2147 if (CurrentLoopValid) {

2148 if (PartiallyInvariant) {

2149

2150

2151 auto &Context = L.getHeader()->getContext();

2153 Context,

2154 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));

2156 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},

2157 {DisableUnswitchMD});

2158 L.setLoopID(NewLoopID);

2159 } else if (InjectedCondition) {

2160

2161 auto &Context = L.getHeader()->getContext();

2163 Context,

2164 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));

2166 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},

2167 {DisableUnswitchMD});

2168 L.setLoopID(NewLoopID);

2169 } else

2170 U.revisitCurrentLoop();

2171 } else

2172 U.markLoopAsDeleted(L, LoopName);

2173}

2174

2179 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {

2180 auto *ParentBB = TI.getParent();

2181 BranchInst *BI = dyn_cast(&TI);

2182 SwitchInst *SI = BI ? nullptr : cast(&TI);

2183

2184

2185

2186 std::string LoopName(L.getName());

2187

2188

2189

2190

2192 "Can only unswitch switches and conditional branch!");

2193 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();

2194 bool FullUnswitch =

2196 !PartiallyInvariant);

2197 if (FullUnswitch)

2199 "Cannot have other invariants with full unswitching!");

2200 else

2202 "Partial unswitching requires an instruction as the condition!");

2203

2206

2207

2208

2209

2210

2211

2212

2214 int ClonedSucc = 0;

2215 if (!FullUnswitch) {

2219 PartiallyInvariant) &&

2220 "Only `or`, `and`, an `select`, partially invariant instructions "

2221 "can combine invariants being unswitched.");

2226 ClonedSucc = 1;

2227 }

2228 }

2229 }

2230

2232 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();

2234 if (BI)

2236 else

2237 for (auto Case : SI->cases())

2238 if (Case.getCaseSuccessor() != RetainedSuccBB)

2239 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());

2240

2241 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&

2242 "Should not unswitch the same successor we are retaining!");

2243

2244

2245

2246

2247

2248 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");

2249

2250

2251 Loop *ParentL = L.getParentLoop();

2252

2254 if (MSSAU)

2256

2257

2258

2259

2260 Loop *OuterExitL = &L;

2262 L.getUniqueExitBlocks(ExitBlocks);

2263 for (auto *ExitBB : ExitBlocks) {

2264

2265

2267 if (!NewOuterExitL) {

2268

2269 OuterExitL = nullptr;

2270 break;

2271 }

2272 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))

2273 OuterExitL = NewOuterExitL;

2274 }

2275

2276

2277

2278

2279 if (SE) {

2280 if (OuterExitL)

2282 else

2285 }

2286

2287

2288

2289

2290

2291

2293 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),

2294 UnswitchedSuccBBs))

2295 if (SuccBB->getUniquePredecessor() ||

2297 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);

2298 }))

2300 DominatingSucc[BB] = SuccBB;

2301 return true;

2302 });

2303

2304

2305

2306

2307

2308

2309 BasicBlock *SplitBB = L.getLoopPreheader();

2310 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);

2311

2312

2314

2315

2317 VMaps.reserve(UnswitchedSuccBBs.size());

2319 for (auto *SuccBB : UnswitchedSuccBBs) {

2322 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,

2323 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);

2324 }

2325

2326

2327

2328 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {

2330

2331

2332 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);

2333 else {

2334

2335

2339 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);

2340 }

2341 }

2342

2343

2344

2345

2346 SplitBB->getTerminator()->eraseFromParent();

2347 if (FullUnswitch) {

2348

2350 NewTI->insertInto(ParentBB, ParentBB->end());

2351

2352

2353

2354 TI.moveBefore(*SplitBB, SplitBB->end());

2356

2357

2358 if (BI) {

2363 if (InsertFreeze) {

2364

2365

2366

2368 }

2370 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});

2371 } else {

2372 assert(SI && "Must either be a branch or switch!");

2373

2374

2375 assert(SI->getDefaultDest() == RetainedSuccBB &&

2376 "Not retaining default successor!");

2377 SI->setDefaultDest(LoopPH);

2378 for (const auto &Case : SI->cases())

2379 if (Case.getCaseSuccessor() == RetainedSuccBB)

2380 Case.setSuccessor(LoopPH);

2381 else

2382 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);

2383

2384 if (InsertFreeze)

2385 SI->setCondition(new FreezeInst(SI->getCondition(),

2386 SI->getCondition()->getName() + ".fr",

2387 SI->getIterator()));

2388

2389

2390

2391

2392 for (BasicBlock *SuccBB : UnswitchedSuccBBs)

2394 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});

2395 }

2396

2397 if (MSSAU) {

2399 DTUpdates.clear();

2400

2401

2402

2403

2405 for (BasicBlock *SuccBB : UnswitchedSuccBBs)

2407

2408 for (auto &VMap : VMaps)

2410 true);

2412

2413

2414 for (BasicBlock *SuccBB : UnswitchedSuccBBs)

2415 MSSAU->removeEdge(ParentBB, SuccBB);

2416 }

2417

2418

2419

2420

2421 if (BI) {

2422

2423 assert(UnswitchedSuccBBs.size() == 1 &&

2424 "Only one possible unswitched block for a branch!");

2425 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();

2427 true);

2428 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});

2429 } else {

2430

2431

2432

2433

2434

2435

2436 SwitchInst *NewSI = cast(NewTI);

2438 "Not retaining default successor!");

2439 for (const auto &Case : NewSI->cases())

2440 Case.getCaseSuccessor()->removePredecessor(

2441 ParentBB,

2442 true);

2443

2444

2445

2446

2447 for (BasicBlock *SuccBB : UnswitchedSuccBBs)

2448 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});

2449 }

2450

2451

2452

2455

2456

2458 } else {

2459 assert(BI && "Only branches have partial unswitching.");

2460 assert(UnswitchedSuccBBs.size() == 1 &&

2461 "Only one possible unswitched block for a branch!");

2463

2464

2465 if (PartiallyInvariant)

2467 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);

2468 else {

2470 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,

2472 }

2473 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});

2474

2475 if (MSSAU) {

2477 DTUpdates.clear();

2478

2479

2480 for (auto &VMap : VMaps)

2482 true);

2484 }

2485 }

2486

2487

2489

2490

2491

2492

2493

2495

2496

2497

2498

2500 for (std::unique_ptr &VMap : VMaps)

2501 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);

2502

2503

2504

2505

2507

2510

2512 bool IsStillLoop =

2514

2517

2518

2519

2520

2521

2522

2523 assert(DT.verify(DominatorTree::VerificationLevel::Fast));

2524

2525 if (BI && !PartiallyInvariant) {

2526

2527

2528

2529

2530 assert(UnswitchedSuccBBs.size() == 1 &&

2531 "Only one possible unswitched block for a branch!");

2533

2534

2535

2536

2537

2538

2539

2540

2541

2542

2543 bool ReplaceUnswitched =

2544 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;

2545

2552 for (Value *Invariant : Invariants) {

2553 assert(!isa(Invariant) &&

2554 "Should not be replacing constant values!");

2555

2557 Instruction *UserI = dyn_cast(U.getUser());

2558 if (!UserI)

2559 continue;

2560

2561

2562

2564 U.set(ContinueReplacement);

2565 else if (ReplaceUnswitched &&

2567 U.set(UnswitchedReplacement);

2568 }

2569 }

2570 }

2571

2572

2573

2574

2575

2576

2577

2578

2579

2580

2581

2582

2583

2584 auto UpdateLoop = [&](Loop &UpdateL) {

2585#ifndef NDEBUG

2586 UpdateL.verifyLoop();

2587 for (Loop *ChildL : UpdateL) {

2588 ChildL->verifyLoop();

2589 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&

2590 "Perturbed a child loop's LCSSA form!");

2591 }

2592#endif

2593

2594

2595

2596 formLCSSA(UpdateL, DT, &LI, SE);

2597

2598

2599

2600

2601

2603 };

2604

2605

2606

2607

2608

2609

2610 for (Loop *UpdatedL :

2611 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {

2612 UpdateLoop(*UpdatedL);

2613 if (UpdatedL->isOutermost())

2614 OuterExitL = nullptr;

2615 }

2616 if (IsStillLoop) {

2617 UpdateLoop(L);

2618 if (L.isOutermost())

2619 OuterExitL = nullptr;

2620 }

2621

2622

2623

2624 if (OuterExitL != &L)

2625 for (Loop *OuterL = ParentL; OuterL != OuterExitL;

2627 UpdateLoop(*OuterL);

2628

2629#ifndef NDEBUG

2630

2631

2633#endif

2634

2635

2636

2637

2639 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))

2640 if (UpdatedL->getParentLoop() == ParentL)

2642 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,

2643 InjectedCondition, SibLoops);

2644

2647

2648 if (BI)

2649 ++NumBranches;

2650 else

2651 ++NumSwitches;

2652}

2653

2654

2655

2656

2657

2658

2659

2664

2665

2666 auto BBCostIt = BBCostMap.find(N.getBlock());

2667 if (BBCostIt == BBCostMap.end())

2668 return 0;

2669

2670

2671 auto DTCostIt = DTCostMap.find(&N);

2672 if (DTCostIt != DTCostMap.end())

2673 return DTCostIt->second;

2674

2675

2676

2678 N.begin(), N.end(), BBCostIt->second,

2680 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);

2681 });

2682 bool Inserted = DTCostMap.insert({&N, Cost}).second;

2683 (void)Inserted;

2684 assert(Inserted && "Should not insert a node while visiting children!");

2685 return Cost;

2686}

2687

2688

2689

2690

2691

2692

2693

2694

2695

2696

2697

2698

2699

2700

2701

2702

2703

2704

2705

2706

2707

2708

2709

2710

2714 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");

2715 BasicBlock *HeadBB = SI->getParent();

2716

2717 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

2719 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);

2720 auto *CondBr = cast(HeadBB->getTerminator());

2721 BasicBlock *ThenBB = CondBr->getSuccessor(0),

2722 *TailBB = CondBr->getSuccessor(1);

2723 if (MSSAU)

2725

2727 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());

2728 Phi->addIncoming(SI->getTrueValue(), ThenBB);

2729 Phi->addIncoming(SI->getFalseValue(), HeadBB);

2730 Phi->setDebugLoc(SI->getDebugLoc());

2731 SI->replaceAllUsesWith(Phi);

2732 SI->eraseFromParent();

2733

2736

2737 ++NumSelects;

2738 return CondBr;

2739}

2740

2741

2742

2743

2744

2745

2746

2747

2748

2749

2750

2751

2752

2753

2754

2755

2756

2757

2758

2759

2760

2761

2766 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");

2768

2771

2772 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

2775 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);

2777

2778

2780

2782 GuardedBlock->setName("guarded");

2785

2786 if (MSSAU)

2788

2791

2792 if (MSSAU) {

2797 }

2798

2801 ++NumGuards;

2802 return CheckBI;

2803}

2804

2805

2806

2807

2808

2809

2810

2811

2812

2813

2814

2815

2816

2817

2822

2823

2824

2825

2826

2827 const BasicBlock *Latch = L.getLoopLatch();

2829 if (DT.dominates(CondBlock, Latch) &&

2833 return L.contains(SuccBB);

2834 }) <= 1))) {

2835 NumCostMultiplierSkipped++;

2836 return 1;

2837 }

2838

2839 auto *ParentL = L.getParentLoop();

2840 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()

2841 : std::distance(LI.begin(), LI.end()));

2842

2843

2844

2845 int UnswitchedClones = 0;

2846 for (const auto &Candidate : UnswitchCandidates) {

2849 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);

2850 if (isa(CI)) {

2851 UnswitchedClones++;

2852 continue;

2853 }

2855 if (!SkipExitingSuccessors)

2856 UnswitchedClones++;

2857 continue;

2858 }

2859 int NonExitingSuccessors =

2861 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {

2862 return !SkipExitingSuccessors || L.contains(SuccBB);

2863 });

2864 UnswitchedClones += Log2_32(NonExitingSuccessors);

2865 }

2866

2867

2868

2869

2870

2871

2872 unsigned ClonesPower =

2874

2875

2876 int SiblingsMultiplier =

2877 std::max((ParentL ? SiblingsCount

2879 1);

2880

2881

2882 int CostMultiplier;

2886 else

2887 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),

2889

2890 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier

2891 << " (siblings " << SiblingsMultiplier << " * clones "

2892 << (1 << ClonesPower) << ")"

2893 << " for unswitch candidate: " << TI << "\n");

2894 return CostMultiplier;

2895}

2896

2902 assert(UnswitchCandidates.empty() && "Should be!");

2903

2906 if (isa(Cond))

2907 return;

2908 if (L.isLoopInvariant(Cond)) {

2910 return;

2911 }

2916 if (!Invariants.empty())

2917 UnswitchCandidates.push_back({I, std::move(Invariants)});

2918 }

2919 };

2920

2921

2922 bool CollectGuards = false;

2925 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);

2926 if (GuardDecl && !GuardDecl->use_empty())

2927 CollectGuards = true;

2928 }

2929

2930 for (auto *BB : L.blocks()) {

2932 continue;

2933

2934 for (auto &I : *BB) {

2935 if (auto *SI = dyn_cast(&I)) {

2936 auto *Cond = SI->getCondition();

2937

2938 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))

2939 AddUnswitchCandidatesForInst(SI, Cond);

2940 } else if (CollectGuards && isGuard(&I)) {

2941 auto *Cond =

2943

2944 if (!isa(Cond) && L.isLoopInvariant(Cond))

2946 }

2947 }

2948

2949 if (auto *SI = dyn_cast(BB->getTerminator())) {

2950

2951

2952 if (!isa(SI->getCondition()) &&

2953 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())

2954 UnswitchCandidates.push_back({SI, {SI->getCondition()}});

2955 continue;

2956 }

2957

2958 auto *BI = dyn_cast(BB->getTerminator());

2959 if (!BI || !BI->isConditional() ||

2960 BI->getSuccessor(0) == BI->getSuccessor(1))

2961 continue;

2962

2963 AddUnswitchCandidatesForInst(BI, BI->getCondition());

2964 }

2965

2966 if (MSSAU && findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&

2967 any\_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {

2968 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();

2969 })) {

2973 dbgs() << "simple-loop-unswitch: Found partially invariant condition "

2974 << *Info->InstToDuplicate[0] << "\n");

2975 PartialIVInfo = *Info;

2976 PartialIVCondBranch = L.getHeader()->getTerminator();

2980 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});

2981 }

2982 }

2983 return !UnswitchCandidates.empty();

2984}

2985

2986

2987

2988

2989

2990

2991

2992

2997 const Loop &L) {

2998 if (!L.contains(IfTrue)) {

2999 Pred = ICmpInst::getInversePredicate(Pred);

3001 }

3002

3003

3004 if (L.isLoopInvariant(LHS)) {

3005 Pred = ICmpInst::getSwappedPredicate(Pred);

3007 }

3008

3009 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {

3010

3011 Pred = ICmpInst::ICMP_ULT;

3012 RHS = ConstantInt::get(

3015 }

3016}

3017

3018

3019

3020

3024 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))

3025 return false;

3026

3027 if (Pred != ICmpInst::ICMP_ULT)

3028 return false;

3029

3030 if (!L.contains(IfTrue) || L.contains(IfFalse))

3031 return false;

3032

3033

3034 if (L.getHeader() == IfTrue)

3035 return false;

3036 return true;

3037}

3038

3039

3040

3041

3042

3047 return false;

3050

3051 assert(Weights.size() == 2 && "Unexpected profile data!");

3053 auto Num = Weights[Idx];

3054 auto Denom = Weights[0] + Weights[1];

3055

3056 if (Denom == 0 || Num > Denom)

3057 return false;

3059 if (LikelyTaken > ActualTaken)

3060 return false;

3061 return true;

3062}

3063

3064

3065

3066

3067

3068

3069

3070

3071

3072

3073

3074

3075

3076

3077

3078

3079

3080

3081

3082static NonTrivialUnswitchCandidate

3086 assert(Candidate.hasPendingInjection() && "Nothing to inject!");

3087 BasicBlock *Preheader = L.getLoopPreheader();

3088 assert(Preheader && "Loop is not in simplified form?");

3090 "Unswitching branch of inner loop!");

3091

3092 auto Pred = Candidate.PendingInjection->Pred;

3093 auto *LHS = Candidate.PendingInjection->LHS;

3094 auto *RHS = Candidate.PendingInjection->RHS;

3095 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;

3096 auto *TI = cast(Candidate.TI);

3097 auto *BB = Candidate.TI->getParent();

3098 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)

3099 : TI->getSuccessor(0);

3100

3101 assert(L.contains(InLoopSucc) && "Not supported yet!");

3102 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");

3103 auto &Ctx = BB->getContext();

3104

3106 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");

3111 else

3113 }

3114

3115

3116 auto *InjectedCond =

3117 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",

3119

3121 BB->getParent(), InLoopSucc);

3123 auto *InvariantBr =

3124 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);

3125

3127 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),

3128 TI->getSuccessor(1));

3130

3131

3132 for (auto &I : *InLoopSucc) {

3133 auto *PN = dyn_cast(&I);

3134 if (!PN)

3135 break;

3136 auto *Inc = PN->getIncomingValueForBlock(BB);

3137 PN->addIncoming(Inc, CheckBlock);

3138 }

3139 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);

3140

3142 { DominatorTree::Insert, BB, CheckBlock },

3143 { DominatorTree::Insert, CheckBlock, InLoopSucc },

3144 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },

3145 { DominatorTree::Delete, BB, OutOfLoopSucc }

3146 };

3147

3149 if (MSSAU)

3151 L.addBasicBlockToLoop(CheckBlock, LI);

3152

3153#ifndef NDEBUG

3158#endif

3159

3160

3161

3162

3163 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr

3164 << " and considering it for unswitching.");

3165 ++NumInvariantConditionsInjected;

3166 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },

3167 Candidate.Cost);

3168}

3169

3170

3171

3172

3173

3174

3175

3176

3177

3178

3179

3180

3185

3187 assert(ICmpInst::isStrictPredicate(Pred));

3188 if (Compares.size() < 2)

3189 return false;

3190 ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);

3191 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;

3192 Next != Compares.end(); ++Prev, ++Next) {

3193 Value *LHS = Next->Invariant;

3194 Value *RHS = Prev->Invariant;

3195 BasicBlock *InLoopSucc = Prev->InLoopSucc;

3196 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);

3197 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },

3198 std::nullopt, std::move(ToInject));

3199 UnswitchCandidates.push_back(std::move(Candidate));

3200 }

3201 return true;

3202}

3203

3204

3205

3206

3207

3208

3209

3210

3211

3212

3213

3214

3215

3216

3217

3218

3225 return false;

3226

3228 return false;

3229 auto *Latch = L.getLoopLatch();

3230

3231 if (!Latch)

3232 return false;

3233 assert(L.getLoopPreheader() && "Must have a preheader!");

3234

3236

3237

3238 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());

3239 DTN = DTN->getIDom()) {

3242 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;

3243 auto *BB = DTN->getBlock();

3244

3246 continue;

3247 auto *Term = BB->getTerminator();

3250 continue;

3252 continue;

3254 L);

3256 continue;

3258 continue;

3259

3260

3261 CompareDesc Desc(cast(Term), RHS, IfTrue);

3262 while (auto *Zext = dyn_cast(LHS))

3263 LHS = Zext->getOperand(0);

3264 CandidatesULT[LHS].push_back(Desc);

3265 }

3266

3267 bool Found = false;

3268 for (auto &It : CandidatesULT)

3270 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);

3271 return Found;

3272}

3273

3275 if (!L.isSafeToClone())

3276 return false;

3277 for (auto *BB : L.blocks())

3278 for (auto &I : *BB) {

3279 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))

3280 return false;

3281 if (auto *CB = dyn_cast(&I)) {

3282 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");

3283 if (CB->isConvergent())

3284 return false;

3285 }

3286 }

3287

3288

3289

3290

3291

3292

3293

3296 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))

3297 return false;

3298

3300 L.getUniqueExitBlocks(ExitBlocks);

3301

3302

3303

3304

3305 for (auto *ExitBB : ExitBlocks) {

3306 auto *I = ExitBB->getFirstNonPHI();

3307 if (isa(I) || isa(I)) {

3308 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "

3309 "in exit block\n");

3310 return false;

3311 }

3312 }

3313

3314 return true;

3315}

3316

3321

3322

3323

3324

3325

3329

3330

3331

3332

3333

3334

3336 L.getHeader()->getParent()->hasMinSize()

3340 for (auto *BB : L.blocks()) {

3342 for (auto &I : *BB) {

3343 if (EphValues.count(&I))

3344 continue;

3346 }

3347 assert(Cost >= 0 && "Must not have negative costs!");

3348 LoopCost += Cost;

3349 assert(LoopCost >= 0 && "Must not have negative loop costs!");

3350 BBCostMap[BB] = Cost;

3351 }

3352 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");

3353

3354

3355

3356

3357

3358

3359

3360

3361

3362

3363

3364

3365

3366

3367

3369

3370

3371 auto ComputeUnswitchedCost = [&](Instruction &TI,

3373

3374 if (isa(TI))

3375 return LoopCost;

3376

3379

3382

3383 if (!Visited.insert(SuccBB).second)

3384 continue;

3385

3386

3387

3388

3389

3390

3391 if (!FullUnswitch) {

3392 auto &BI = cast(TI);

3395 if (SuccBB == BI.getSuccessor(1))

3396 continue;

3398 if (SuccBB == BI.getSuccessor(0))

3399 continue;

3401 SuccBB == BI.getSuccessor(0)) ||

3403 SuccBB == BI.getSuccessor(1)))

3404 continue;

3405 }

3406

3407

3408

3409

3410

3411 if (SuccBB->getUniquePredecessor() ||

3413 return PredBB == &BB || DT.dominates(SuccBB, PredBB);

3414 })) {

3417 "Non-duplicated cost should never exceed total loop cost!");

3418 }

3419 }

3420

3421

3422

3423

3424

3425

3426 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();

3427 assert(SuccessorsCount > 1 &&

3428 "Cannot unswitch a condition without multiple distinct successors!");

3429 return (LoopCost - Cost) * (SuccessorsCount - 1);

3430 };

3431

3432 std::optional Best;

3433 for (auto &Candidate : UnswitchCandidates) {

3436 BranchInst *BI = dyn_cast(&TI);

3437 bool FullUnswitch =

3438 !BI || Candidate.hasPendingInjection() ||

3439 (Invariants.size() == 1 &&

3441 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);

3442

3443

3445 int CostMultiplier =

3449 "cost multiplier needs to be in the range of 1..UnswitchThreshold");

3450 CandidateCost *= CostMultiplier;

3451 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost

3452 << " (multiplier: " << CostMultiplier << ")"

3453 << " for unswitch candidate: " << TI << "\n");

3454 } else {

3455 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost

3456 << " for unswitch candidate: " << TI << "\n");

3457 }

3458

3459 if (!Best || CandidateCost < Best->Cost) {

3460 Best = Candidate;

3461 Best->Cost = CandidateCost;

3462 }

3463 }

3464 assert(Best && "Must be!");

3465 return *Best;

3466}

3467

3468

3469

3470

3471

3472

3473

3476 assert(isa(TI) || isa(TI));

3478 return false;

3479

3483 return false;

3484

3486 if (BranchInst *BI = dyn_cast(&TI))

3488 else

3491 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);

3492}

3493

3499

3500

3503 Instruction *PartialIVCondBranch = nullptr;

3505 PartialIVCondBranch, L, LI, AA, MSSAU);

3508 PartialIVCondBranch, L, DT, LI, AA,

3509 MSSAU);

3510

3511 if (UnswitchCandidates.empty())

3512 return false;

3513

3515 dbgs() << "Considering " << UnswitchCandidates.size()

3516 << " non-trivial loop invariant conditions for unswitching.\n");

3517

3519 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);

3520

3521 assert(Best.TI && "Failed to find loop unswitch candidate");

3522 assert(Best.Cost && "Failed to compute cost");

3523

3525 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost

3526 << "\n");

3527 return false;

3528 }

3529

3530 bool InjectedCondition = false;

3531 if (Best.hasPendingInjection()) {

3533 InjectedCondition = true;

3534 }

3535 assert(!Best.hasPendingInjection() &&

3536 "All injections should have been done by now!");

3537

3538 if (Best.TI != PartialIVCondBranch)

3540

3541 bool InsertFreeze;

3542 if (auto *SI = dyn_cast(Best.TI)) {

3543

3544

3545

3546

3548 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);

3550 } else {

3551

3553 Best.TI =

3556 }

3557

3558 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost

3559 << ") terminator: " << *Best.TI << "\n");

3561 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,

3562 InjectedCondition);

3563 return true;

3564}

3565

3566

3567

3568

3569

3570

3571

3572

3573

3574

3575

3576

3577

3578

3579

3580

3581

3582

3583

3584

3585

3586

3593 assert(L.isRecursivelyLCSSAForm(DT, LI) &&

3594 "Loops must be in LCSSA form before unswitching.");

3595

3596

3597 if (!L.isLoopSimplifyForm())

3598 return false;

3599

3600

3602

3603

3605 true, false,

3606 false, {});

3607 return true;

3608 }

3609

3610 const Function *F = L.getHeader()->getParent();

3611

3612

3613

3614

3615

3616

3617

3618

3619

3620

3621

3622

3623 bool ContinueWithNonTrivial =

3625 if (!ContinueWithNonTrivial)

3626 return false;

3627

3628

3629 if (F->hasOptSize())

3630 return false;

3631

3632

3633

3634 auto IsLoopNestCold = [&](const Loop *L) {

3635

3636 auto *Parent = L;

3637 while (Parent) {

3638 if (!PSI->isColdBlock(Parent->getHeader(), BFI))

3639 return false;

3640 Parent = Parent->getParentLoop();

3641 }

3642

3644 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),

3645 L->getSubLoops().end());

3646 while (!Worklist.empty()) {

3648 if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))

3649 return false;

3650 Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),

3651 CurLoop->getSubLoops().end());

3652 }

3653 return true;

3654 };

3655

3656

3657

3658 if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {

3659 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");

3660 return false;

3661 }

3662

3663

3665 return false;

3666

3667

3668

3669

3670

3671

3672

3673

3674

3676 return true;

3677

3678

3679 return false;

3680}

3681

3685 Function &F = *L.getHeader()->getParent();

3686 (void)F;

3688 if (auto OuterProxy =

3692 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L

3693 << "\n");

3694

3695 std::optional MSSAU;

3696 if (AR.MSSA) {

3700 }

3702 &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))

3704

3707

3708

3709

3710 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));

3711

3713 if (AR.MSSA)

3715 return PA;

3716}

3717

3721 OS, MapClassName2PassName);

3722

3723 OS << '<';

3724 OS << (NonTrivial ? "" : "no-") << "nontrivial;";

3725 OS << (Trivial ? "" : "no-") << "trivial";

3726 OS << '>';

3727}

Analysis containing CSE Info

This file contains the declarations for the subclasses of Constant, which represent the different fla...

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file defines the DenseMap class.

DenseMap< Block *, BlockRelaxAux > Blocks

This file defines a set of templates that efficiently compute a dominator tree over a generic graph.

Module.h This file contains the declarations for the Module class.

This defines the Use class.

This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

This header provides classes for managing per-loop analyses.

This header provides classes for managing a pipeline of passes over loops in LLVM IR.

This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...

uint64_t IntrinsicInst * II

This file contains the declarations for profiling metadata utility functions.

const SmallVectorImpl< MachineOperand > & Cond

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

Provides some synthesis utilities to produce sequences of values.

This file implements a set that has insertion order iteration characteristics.

static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)

Rewrite the PHI nodes in an unswitched loop exit basic block.

static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)

Unswitch control flow predicated on loop invariant conditions.

static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)

This routine scans the loop to find a branch or switch which occurs before any side effects occur.

static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))

static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))

static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)

Recompute the set of blocks in a loop after unswitching.

static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)

Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.

static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)

Copy a set of loop invariant values, and conditionally branch on them.

static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)

Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...

static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)

void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)

Helper to visit a dominator subtree, invoking a callable on each node.

static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)

Turns a select instruction into implicit control flow branch, making the following replacement:

static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)

void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)

static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)

Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...

static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))

static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)

Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...

static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)

static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))

static Value * skipTrivialSelect(Value *Cond)

static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)

static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)

Collect unswitch candidates by invariant conditions that are not immediately present in the loop.

static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))

static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)

static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)

Unswitch a trivial branch if the condition is loop invariant.

static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)

static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))

static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))

static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)

Recursively compute the cost of a dominator subtree based on the per-block cost map provided.

static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)

static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))

static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)

static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)

Tries to canonicalize condition described by:

static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)

Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...

static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)

Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.

static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)

Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...

static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)

Materialize pending invariant condition of the given candidate into IR.

static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))

static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/ times or less."), cl::init(16))

static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)

Unswitch a trivial switch if the condition is loop invariant.

static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)

static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)

Rebuild a loop after unswitching removes some subset of blocks and edges.

static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)

static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)

Build the cloned blocks for an unswitched copy of the given loop.

static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)

bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)

Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...

static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)

Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...

static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)

Recursively clone the specified loop and all of its children.

static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)

Hoist the current loop up to the innermost loop containing a remaining exit.

static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)

Build the cloned loops of an original loop from unswitching.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

static APInt getSignedMinValue(unsigned numBits)

Gets minimum signed value of APInt for a specific bit width.

A container for analyses that lazily runs them and caches their results.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

A cache of @llvm.assume calls within a function.

void registerAssumption(AssumeInst *CI)

Add an @llvm.assume intrinsic to this function's cache.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

InstListType::iterator iterator

Instruction iterators...

void moveBefore(BasicBlock *MovePos)

Unlink this basic block from its current function and insert it into the function that MovePos lives ...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)

Update PHI nodes in this BasicBlock before removal of predecessor Pred.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Conditional or Unconditional Branch instruction.

void setCondition(Value *V)

void swapSuccessors()

Swap the successors of this branch instruction.

bool isConditional() const

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

BasicBlock * getSuccessor(unsigned i) const

void setSuccessor(unsigned idx, BasicBlock *NewSucc)

Value * getCondition() const

Value * getArgOperand(unsigned i) const

void setArgOperand(unsigned i, Value *v)

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

This is the shared class of boolean and integer constants.

static ConstantInt * getTrue(LLVMContext &Context)

static ConstantInt * getFalse(LLVMContext &Context)

This is an important base class in LLVM.

bool isOneValue() const

Returns true if the value is one.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

bool verify(VerificationLevel VL=VerificationLevel::Full) const

verify - checks if the tree is correct.

void applyUpdates(ArrayRef< UpdateType > Updates)

Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...

void insertEdge(NodeT *From, NodeT *To)

Inform the dominator tree about a CFG edge insertion and update the tree.

static constexpr UpdateKind Delete

static constexpr UpdateKind Insert

void deleteEdge(NodeT *From, NodeT *To)

Inform the dominator tree about a CFG edge deletion and update the tree.

DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const

getNode - return the (Post)DominatorTree node for the specified basic block.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

bool isReachableFromEntry(const Use &U) const

Provide an overload for a Use.

bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

This class represents a freeze function that returns random concrete value if an operand is either a ...

This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...

bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override

Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...

void computeLoopSafetyInfo(const Loop *CurLoop) override

Computes safety information for a loop checks loop body & header for the possibility of may throw exc...

bool isRelational() const

Return true if the predicate is relational (not EQ or NE).

Value * CreateFreeze(Value *V, const Twine &Name="")

BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)

Create a conditional 'br Cond, TrueDest, FalseDest' instruction.

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Instruction * clone() const

Create a copy of 'this' instruction that is identical in all ways except the following:

void dropLocation()

Drop the instruction's debug location.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

bool isTerminator() const

void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

void moveBefore(Instruction *MovePos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)

Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...

A wrapper class for inspecting calls to intrinsic functions.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

void markLoopAsDeleted(Loop &L, llvm::StringRef Name)

Loop passes should use this method to indicate they have deleted a loop from the nest.

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

bool isInnermost() const

Return true if the loop does not contain any (natural) loops.

unsigned getNumBlocks() const

Get the number of blocks in this loop in constant time.

BlockT * getHeader() const

void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)

This method is used by other analyses to update loop information.

void reserveBlocks(unsigned size)

interface to do reserve() for Blocks

iterator_range< block_iterator > blocks() const

void addChildLoop(LoopT *NewChild)

Add the specified loop to be a child of this loop.

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

LoopT * getParentLoop() const

Return the parent loop if it exists or nullptr for top level loops.

bool isLoopExiting(const BlockT *BB) const

True if terminator in the block can branch to another block that is outside of the current loop.

LoopT * removeChildLoop(iterator I)

This removes the specified child from being a subloop of this loop.

Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...

void perform(const LoopInfo *LI)

Traverse the loop blocks and store the DFS result.

void verify(const DominatorTreeBase< BlockT, false > &DomTree) const

void addTopLevelLoop(LoopT *New)

This adds the specified loop to the collection of top-level loops.

LoopT * AllocateLoop(ArgsTy &&...Args)

LoopT * removeLoop(iterator I)

This removes the specified top-level loop from this loop info object.

void changeLoopFor(BlockT *BB, LoopT *L)

Change the top-level loop that contains BB to the specified loop.

unsigned getLoopDepth(const BlockT *BB) const

Return the loop nesting level of the specified block.

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

void destroy(LoopT *L)

Destroy a loop that has been removed from the LoopInfo nest.

Represents a single loop in the control flow graph.

StringRef getName() const

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

static MDString * get(LLVMContext &Context, StringRef Str)

Represents a read-write access to memory, whether it is a must-alias, or a may-alias.

An analysis that produces MemorySSA for a function.

MemorySSA * getMemorySSA() const

Get handle on MemorySSA.

void removeEdge(BasicBlock *From, BasicBlock *To)

Update the MemoryPhi in To following an edge deletion between From and To.

void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)

Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...

void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)

Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...

void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)

Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.

void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)

From block was spliced into From and To.

MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)

Create a MemoryAccess in MemorySSA at a specified point in a block.

void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)

Apply CFG insert updates, analogous with the DT edge updates.

void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)

Apply CFG updates, analogous with the DT edge updates.

void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)

void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)

Update phi nodes in exit block successors following cloning.

Encapsulates MemorySSA, including all data associated with memory accesses.

void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const

Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...

MemoryUseOrDef * getMemoryAccess(const Instruction *I) const

Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.

const DefsList * getBlockDefs(const BasicBlock *BB) const

Return the list of MemoryDef's and MemoryPhi's for a given basic block.

A Module instance is used to store all the information related to an LLVM module.

An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

An analysis pass based on the new PM to deliver ProfileSummaryInfo.

Analysis providing profile information.

bool hasProfileSummary() const

Returns true if profile summary is available.

bool isColdBlock(const BBType *BB, BFIT *BFI) const

Returns true if BasicBlock BB is considered cold.

The main scalar evolution driver.

void forgetLoop(const Loop *L)

This method should be called by the client when it has changed a loop in a way that may effect Scalar...

void forgetTopmostLoop(const Loop *L)

void forgetBlockAndLoopDispositions(Value *V=nullptr)

Called when the client has changed the disposition of values in a loop or block.

void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)

Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...

This class represents the LLVM 'select' instruction.

size_type size() const

Determine the number of elements in the SetVector.

size_type count(const key_type &key) const

Count the number of elements of a given key in the SetVector.

iterator begin()

Get an iterator to the beginning of the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

bool erase(PtrType Ptr)

Remove pointer from the set.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

iterator insert(iterator I, T &&Elt)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...

void setSuccessorWeight(unsigned idx, CaseWeightOpt W)

Instruction::InstListType::iterator eraseFromParent()

Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...

void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)

Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...

CaseWeightOpt getSuccessorWeight(unsigned idx)

std::optional< uint32_t > CaseWeightOpt

SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)

Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.

unsigned getSuccessorIndex() const

Returns successor index for current case successor.

BasicBlockT * getCaseSuccessor() const

Resolves successor for current case.

ConstantIntT * getCaseValue() const

Resolves case value for current case.

BasicBlock * getDefaultDest() const

static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)

void setDefaultDest(BasicBlock *DefaultCase)

iterator_range< CaseIt > cases()

Iteration adapter for range-for loops.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

bool hasBranchDivergence(const Function *F=nullptr) const

Return true if branch divergence exists.

TargetCostKind

The kind of cost model.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const

Estimate the cost of a given IR user when lowered.

TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...

void push_back(EltTy NewVal)

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

unsigned getIntegerBitWidth() const

bool isIntegerTy() const

True if this is an instance of IntegerType.

A Use represents the edge between a Value definition and its users.

ValueT lookup(const KeyT &Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

size_type count(const KeyT &Val) const

Return 1 if the specified key is in the map, 0 otherwise.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

void setName(const Twine &Name)

Change the name of the value.

LLVMContext & getContext() const

All values hold a context through their type.

iterator_range< use_iterator > uses()

StringRef getName() const

Return a constant reference to the value's name.

An efficient, type-erasing, non-owning reference to a callable.

const ParentTy * getParent() const

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)

This version supports overloaded intrinsics.

bool match(Val *V, const Pattern &P)

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

auto m_LogicalOr()

Matches L || R where L and R are arbitrary values.

brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

auto m_LogicalAnd()

Matches L && R where L and R are arbitrary values.

class_match< BasicBlock > m_BasicBlock()

Match an arbitrary basic block value and ignore it.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void stable_sort(R &&Range)

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

auto successors(const MachineBasicBlock *BB)

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)

Find string metadata for a loop.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

bool isGuard(const User *U)

Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....

auto reverse(ContainerTy &&C)

detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)

zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.

void sort(IteratorTy Start, IteratorTy End)

@ RF_IgnoreMissingLocals

If this flag is set, the remapper ignores missing function-local entries (Argument,...

@ RF_NoModuleLevelChanges

If this flag is set, the remapper knows that only local values within a function (such as an instruct...

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool VerifyLoopInfo

Enable verification of loop info.

void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)

Convert the instruction operands from referencing the current values into those specified by VM.

BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr)

Return a copy of the specified basic block, but without embedding the block into a particular functio...

bool VerifyMemorySSA

Enables verification of MemorySSA.

bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)

Ensure that all exit blocks of the loop are dedicated exits.

bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Return true if this function can prove that V does not have undef bits and is never poison.

ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy

bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)

Extract branch weights from MD_prof metadata.

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)

Create a new LoopID after the loop has been transformed.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

auto predecessors(const MachineBasicBlock *BB)

bool pred_empty(const BasicBlock *BB)

Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)

Split the containing block at the specified instruction - everything before SplitBefore stays in the ...

BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")

Split the edge connecting the specified blocks, and return the newly created basic block between From...

std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)

Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...

bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)

Put loop into LCSSA form.

void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)

Remap the Values used in the DbgRecords Range using the value map VM.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

A special type used by analysis passes to provide an address that identifies that particular analysis...

static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)

Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).

Description of the encoding of one expression Op.

Struct to hold information about a partially invariant condition.

SmallVector< Instruction * > InstToDuplicate

Instructions that need to be duplicated and checked for the unswitching condition.

Constant * KnownValue

Constant to indicate for which value the condition is invariant.

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...

TargetTransformInfo & TTI

Direction

An enum for the direction of the loop.

A CRTP mix-in to automatically provide informational APIs needed for passes.