LLVM: lib/Transforms/Scalar/LoopInterchange.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

48#include

49#include

50#include

51

52using namespace llvm;

53

54#define DEBUG_TYPE "loop-interchange"

55

56STATISTIC(LoopsInterchanged, "Number of loops interchanged");

57

60 cl::desc("Interchange if you gain more than this number"));

61

62

66 "Maximum number of load-store instructions that should be handled "

67 "in the dependency matrix. Higher value may lead to more interchanges "

68 "at the cost of compile-time"));

69

70namespace {

71

73

74

75

76

77

78

79

80

81

82using CharMatrix = std::vector<std::vector>;

83

84

85enum class RuleTy {

86 PerLoopCacheAnalysis,

87 PerInstrOrderCost,

88 ForVectorization,

90};

91

92}

93

94

97 cl::desc("Minimum depth of loop nest considered for the transform"));

98

99

102 cl::desc("Maximum depth of loop nest considered for the transform"));

103

104

108 cl::desc("List of profitability heuristics to be used. They are applied in "

109 "the given order"),

111 RuleTy::PerInstrOrderCost,

112 RuleTy::ForVectorization}),

114 "Prioritize loop cache cost"),

115 clEnumValN(RuleTy::PerInstrOrderCost, "instorder",

116 "Prioritize the IVs order of each instruction"),

117 clEnumValN(RuleTy::ForVectorization, "vectorize",

118 "Prioritize vectorization"),

120 "Ignore profitability, force interchange (does not "

121 "work with other options)")));

122

123#ifndef NDEBUG

126 for (RuleTy Rule : Rules) {

127 if (!Set.insert(Rule).second)

128 return false;

129 if (Rule == RuleTy::Ignore)

130 return false;

131 }

132 return true;

133}

134

136 for (auto &Row : DepMatrix) {

137

138

142 }

143}

144

145

146

147

149 assert(Src->getParent() == Dst->getParent() && Src != Dst &&

150 "Expected Src and Dst to be different instructions in the same BB");

151

152 bool FoundSrc = false;

153 for (const Instruction &I : *(Src->getParent())) {

154 if (&I == Src) {

155 FoundSrc = true;

156 continue;

157 }

158 if (&I == Dst)

159 return FoundSrc;

160 }

161

163}

164#endif

165

171

173

174

176

179 return false;

181 if (!Ld->isSimple())

182 return false;

185 if (!St->isSimple())

186 return false;

188 }

189 }

190 }

191

193 << " Loads and Stores to analyze\n");

195 LLVM_DEBUG(dbgs() << "The transform doesn't support more than "

197 ORE->emit([&]() {

199 L->getStartLoc(), L->getHeader())

200 << "Number of loads/stores exceeded, the supported maximum "

201 "can be increased with option "

202 "-loop-interchange-maxmeminstr-count.";

203 });

204 return false;

205 }

207

208

209

211

212 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {

213 for (J = I, JE = MemInstr.end(); J != JE; ++J) {

214 std::vector Dep;

217

219 continue;

220

221 if (auto D = DI->depends(Src, Dst)) {

222 assert(D->isOrdered() && "Expected an output, flow or anti dep.");

223

224

225 if (D->normalize(SE))

226 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");

228 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";

229 dbgs() << "Found " << DepType

230 << " dependency between Src and Dst\n"

231 << " Src:" << *Src << "\n Dst:" << *Dst << '\n');

232 unsigned Levels = D->getLevels();

234 for (unsigned II = 1; II <= Levels; ++II) {

235

236

237

238

239

240

241 unsigned Dir = D->getDirection(II);

248 else

251 }

252

253

254

255 if (D->isConfused()) {

256 assert(Dep.empty() && "Expected empty dependency vector");

257 Dep.assign(Level, '*');

258 }

259

260 while (Dep.size() != Level) {

261 Dep.push_back('I');

262 }

263

264

265

266 if (all_of(Dep, [](char C) { return C == '*'; })) {

267 ORE->emit([&]() {

269 L->getStartLoc(), L->getHeader())

270 << "All loops have dependencies in all directions.";

271 });

272 return false;

273 }

274

275

276 bool IsKnownForward = true;

277 if (Src->getParent() != Dst->getParent()) {

278

279

280

281 IsKnownForward = false;

282 } else {

283

284

285

287 "Unexpected instructions");

288

289

290

291

292 bool IsReversed = D->getSrc() != Src;

293 if (IsReversed)

294 IsKnownForward = false;

295 }

296

297

298

299 Dep.push_back('<');

300

301

302

303

304

306 StringRef(Dep.data(), Dep.size() - 1), DepMatrix.size());

307

308

309 if (Inserted)

310 DepMatrix.push_back(Dep);

311

312

313

314

315

316 if (!IsKnownForward)

317 DepMatrix[Ite->second].back() = '*';

318 }

319 }

320 }

321

322 return true;

323}

324

325

326

328 unsigned ToIndx) {

329 for (auto &Row : DepMatrix)

330 std::swap(Row[ToIndx], Row[FromIndx]);

331}

332

333

334

335

336

337

338static std::optional

340 for (unsigned char Direction : DV.slice(Begin, End - Begin)) {

342 return true;

344 return false;

345 }

346 return std::nullopt;

347}

348

349

351 unsigned InnerLoopId,

352 unsigned OuterLoopId) {

353 unsigned NumRows = DepMatrix.size();

354 std::vector Cur;

355

356 for (unsigned Row = 0; Row < NumRows; ++Row) {

357

358

359 Cur = DepMatrix[Row];

360

361

362

363

365 continue;

366

367

368

369

371 return false;

372 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);

374 return false;

375 }

376 return true;

377}

378

380 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "

381 << L.getHeader()->getParent()->getName() << " Loop: %"

382 << L.getHeader()->getName() << '\n');

383 assert(LoopList.empty() && "LoopList should initially be empty!");

384 Loop *CurrentLoop = &L;

385 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();

386 while (!Vec->empty()) {

387

388

389

390 if (Vec->size() != 1) {

391 LoopList = {};

392 return;

393 }

394

396 CurrentLoop = Vec->front();

398 }

400}

401

404 unsigned LoopNestDepth = LoopList.size();

405 if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {

406 LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth

409 Loop *OuterLoop = LoopList.front();

410 ORE.emit([&]() {

414 << "Unsupported depth of loop nest, the supported range is ["

417 });

418 return false;

419 }

420 return true;

421}

422

425 for (Loop *L : LoopList) {

428 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");

429 return false;

430 }

431 if (L->getNumBackEdges() != 1) {

432 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");

433 return false;

434 }

435 if (!L->getExitingBlock()) {

436 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");

437 return false;

438 }

439 }

440 return true;

441}

442

443namespace {

444

445

446class LoopInterchangeLegality {

447public:

448 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,

449 OptimizationRemarkEmitter *ORE)

450 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}

451

452

453 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,

454 CharMatrix &DepMatrix);

455

456

457

458 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);

459

460

461

462 bool isLoopStructureUnderstood();

463

464 bool currentLimitations();

465

466 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {

467 return OuterInnerReductions;

468 }

469

471 return InnerLoopInductions;

472 }

473

475 return HasNoWrapReductions;

476 }

477

478private:

479 bool tightlyNested(Loop *Outer, Loop *Inner);

480 bool containsUnsafeInstructions(BasicBlock *BB);

481

482

483

484

485

486 bool findInductionAndReductions(Loop *L,

488 Loop *InnerLoop);

489

490 Loop *OuterLoop;

491 Loop *InnerLoop;

492

493 ScalarEvolution *SE;

494

495

496 OptimizationRemarkEmitter *ORE;

497

498

499

500 SmallPtrSet<PHINode *, 4> OuterInnerReductions;

501

502

504

505

506

507

508 SmallVector<Instruction *, 4> HasNoWrapReductions;

509};

510

511

512

513

514class CacheCostManager {

515 Loop *OutermostLoop;

516 LoopStandardAnalysisResults *AR;

517 DependenceInfo *DI;

518

519

520

521 std::optional<std::unique_ptr> CC;

522

523

524

525 DenseMap<const Loop *, unsigned> CostMap;

526

527 void computeIfUnitinialized();

528

529public:

530 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,

531 DependenceInfo *DI)

532 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}

533 CacheCost *getCacheCost();

534 const DenseMap<const Loop *, unsigned> &getCostMap();

535};

536

537

538

539class LoopInterchangeProfitability {

540public:

541 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,

542 OptimizationRemarkEmitter *ORE)

543 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}

544

545

546 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,

547 unsigned InnerLoopId, unsigned OuterLoopId,

548 CharMatrix &DepMatrix, CacheCostManager &CCM);

549

550private:

551 int getInstrOrderCost();

552 std::optional isProfitablePerLoopCacheAnalysis(

553 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);

554 std::optional isProfitablePerInstrOrderCost();

555 std::optional isProfitableForVectorization(unsigned InnerLoopId,

556 unsigned OuterLoopId,

557 CharMatrix &DepMatrix);

558 Loop *OuterLoop;

559 Loop *InnerLoop;

560

561

562 ScalarEvolution *SE;

563

564

565 OptimizationRemarkEmitter *ORE;

566};

567

568

569class LoopInterchangeTransform {

570public:

571 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,

572 LoopInfo *LI, DominatorTree *DT,

573 const LoopInterchangeLegality &LIL)

574 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}

575

576

578 void restructureLoops(Loop *NewInner, Loop *NewOuter,

579 BasicBlock *OrigInnerPreHeader,

580 BasicBlock *OrigOuterPreHeader);

581 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);

582

583private:

584 bool adjustLoopLinks();

585 bool adjustLoopBranches();

586

587 Loop *OuterLoop;

588 Loop *InnerLoop;

589

590

591 ScalarEvolution *SE;

592

593 LoopInfo *LI;

594 DominatorTree *DT;

595

596 const LoopInterchangeLegality &LIL;

597};

598

599struct LoopInterchange {

600 ScalarEvolution *SE = nullptr;

601 LoopInfo *LI = nullptr;

602 DependenceInfo *DI = nullptr;

603 DominatorTree *DT = nullptr;

604 LoopStandardAnalysisResults *AR = nullptr;

605

606

607 OptimizationRemarkEmitter *ORE;

608

609 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,

610 DominatorTree *DT, LoopStandardAnalysisResults *AR,

611 OptimizationRemarkEmitter *ORE)

612 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}

613

614 bool run(Loop *L) {

615 if (L->getParentLoop())

616 return false;

617 SmallVector<Loop *, 8> LoopList;

619 return processLoopList(LoopList);

620 }

621

622 bool run(LoopNest &LN) {

623 SmallVector<Loop *, 8> LoopList(LN.getLoops());

624 for (unsigned I = 1; I < LoopList.size(); ++I)

625 if (LoopList[I]->getParentLoop() != LoopList[I - 1])

626 return false;

627 return processLoopList(LoopList);

628 }

629

630 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {

631

632

633 return LoopList.size() - 1;

634 }

635

636 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {

638

639

641 "Unsupported depth of loop nest.");

642

643 unsigned LoopNestDepth = LoopList.size();

644

645 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth

646 << "\n");

647

648 CharMatrix DependencyMatrix;

649 Loop *OuterMostLoop = *(LoopList.begin());

651 OuterMostLoop, DI, SE, ORE)) {

652 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");

653 return false;

654 }

655

656 LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";

658

659

661 if (!LoopNestExit) {

662 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");

663 return false;

664 }

665

666 unsigned SelecLoopId = selectLoopForInterchange(LoopList);

667 CacheCostManager CCM(LoopList[0], AR, DI);

668

669

670

671

672 for (unsigned j = SelecLoopId; j > 0; j--) {

673 bool ChangedPerIter = false;

674 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {

675 bool Interchanged =

676 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);

677 ChangedPerIter |= Interchanged;

679 }

680

681

682 if (!ChangedPerIter)

683 break;

684 }

686 }

687

688 bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,

689 unsigned OuterLoopId,

690 std::vector<std::vector> &DependencyMatrix,

691 CacheCostManager &CCM) {

692 Loop *OuterLoop = LoopList[OuterLoopId];

693 Loop *InnerLoop = LoopList[InnerLoopId];

694 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId

695 << " and OuterLoopId = " << OuterLoopId << "\n");

696 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);

697 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {

698 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");

699 return false;

700 }

701 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");

702 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);

703 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,

704 DependencyMatrix, CCM)) {

705 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");

706 return false;

707 }

708

709 ORE->emit([&]() {

710 return OptimizationRemark(DEBUG_TYPE, "Interchanged",

713 << "Loop interchanged with enclosing loop.";

714 });

715

716 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);

717 LIT.transform(LIL.getHasNoWrapReductions());

719 LoopsInterchanged++;

720

722

723

724 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);

725

727

728 LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";

730

731 return true;

732 }

733};

734

735}

736

737bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {

738 return any_of(*BB, [](const Instruction &I) {

739 return I.mayHaveSideEffects() || I.mayReadFromMemory();

740 });

741}

742

743bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {

747

748 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");

749

750

751

752

753 BranchInst *OuterLoopHeaderBI =

755 if (!OuterLoopHeaderBI)

756 return false;

757

758 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))

759 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&

760 Succ != OuterLoopLatch)

761 return false;

762

763 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");

764

765

766 if (containsUnsafeInstructions(OuterLoopHeader) ||

767 containsUnsafeInstructions(OuterLoopLatch))

768 return false;

769

770

771

772

773 if (InnerLoopPreHeader != OuterLoopHeader &&

774 containsUnsafeInstructions(InnerLoopPreHeader))

775 return false;

776

778

779

782 if (&SuccInner != OuterLoopLatch) {

783 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit

784 << " does not lead to the outer loop latch.\n";);

785 return false;

786 }

787

788

789

790 if (containsUnsafeInstructions(InnerLoopExit))

791 return false;

792

793 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");

794

795 return true;

796}

797

798bool LoopInterchangeLegality::isLoopStructureUnderstood() {

800 for (PHINode *InnerInduction : InnerLoopInductions) {

801 unsigned Num = InnerInduction->getNumOperands();

802 for (unsigned i = 0; i < Num; ++i) {

803 Value *Val = InnerInduction->getOperand(i);

805 continue;

807 if (I)

808 return false;

809

810

811

813 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==

814 InnerLoopPreheader &&

816 return false;

817 }

818 }

819 }

820

821

822

823

824

825

826

828 BranchInst *InnerLoopLatchBI =

831 return false;

832 if (CmpInst *InnerLoopCmp =

834 Value *Op0 = InnerLoopCmp->getOperand(0);

835 Value *Op1 = InnerLoopCmp->getOperand(1);

836

837

838

841

842

843

844

845

846 std::function<bool(Value *)> IsPathToInnerIndVar;

847 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {

849 return true;

851 return true;

853 if (I)

854 return false;

856 return IsPathToInnerIndVar(I->getOperand(0));

858 return IsPathToInnerIndVar(I->getOperand(0)) &&

859 IsPathToInnerIndVar(I->getOperand(1));

860 return false;

861 };

862

863

864

865 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))

866 return true;

867

868

869

870 if (IsPathToInnerIndVar(Op0) && isa<Constant>(Op0)) {

873 } else if (IsPathToInnerIndVar(Op1) && isa<Constant>(Op1)) {

876 }

877

878 if (Left == nullptr)

879 return false;

880

883 return false;

884 }

885

886 return true;

887}

888

889

890

893 if (PHI)

894 return SV;

895

896 if (PHI->getNumIncomingValues() != 1)

897 return SV;

899}

900

901

902static PHINode *

905

907 return nullptr;

908

911 if (PHI->getNumIncomingValues() == 1)

912 continue;

915

917 return nullptr;

918

920 switch (RK) {

938 return PHI;

939

940

941

942

943

944

945

946

947

948

949

950

951

952

957

958

959 if (Ops.empty())

960 return nullptr;

961

963 assert(I->getOpcode() == OpCode &&

964 "Expected the instruction to be the reduction operation");

965 (void)OpCode;

966

967

968

969 if (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())

971 }

972 return PHI;

973 }

974

975 default:

976 return nullptr;

977 }

978 }

979 return nullptr;

980 }

981 }

982

983 return nullptr;

984}

985

986bool LoopInterchangeLegality::findInductionAndReductions(

988 if (L->getLoopLatch() || L->getLoopPredecessor())

989 return false;

990 for (PHINode &PHI : L->getHeader()->phis()) {

991 InductionDescriptor ID;

994 else {

995

996

997 if (!InnerLoop) {

998 if (!OuterInnerReductions.count(&PHI)) {

999 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "

1000 "across the outer loop.\n");

1001 return false;

1002 }

1003 } else {

1004 assert(PHI.getNumIncomingValues() == 2 &&

1005 "Phis in loop header should have exactly 2 incoming values");

1006

1007

1009 PHINode *InnerRedPhi =

1011 if (!InnerRedPhi ||

1015 << "Failed to recognize PHI as an induction or reduction.\n");

1016 return false;

1017 }

1018 OuterInnerReductions.insert(&PHI);

1019 OuterInnerReductions.insert(InnerRedPhi);

1020 }

1021 }

1022 }

1023 return true;

1024}

1025

1026

1027

1028bool LoopInterchangeLegality::currentLimitations() {

1030

1031

1032

1038 dbgs() << "Loops where the latch is not the exiting block are not"

1039 << " supported currently.\n");

1040 ORE->emit([&]() {

1041 return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",

1044 << "Loops where the latch is not the exiting block cannot be"

1045 " interchange currently.";

1046 });

1047 return true;

1048 }

1049

1051 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {

1053 dbgs() << "Only outer loops with induction or reduction PHI nodes "

1054 << "are supported currently.\n");

1055 ORE->emit([&]() {

1056 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",

1059 << "Only outer loops with induction or reduction PHI nodes can be"

1060 " interchanged currently.";

1061 });

1062 return true;

1063 }

1064

1065 Inductions.clear();

1066

1067

1068

1069 Loop *CurLevelLoop = OuterLoop;

1070 while (!CurLevelLoop->getSubLoops().empty()) {

1071

1072 CurLevelLoop = CurLevelLoop->getSubLoops().front();

1073 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {

1075 dbgs() << "Only inner loops with induction or reduction PHI nodes "

1076 << "are supported currently.\n");

1077 ORE->emit([&]() {

1078 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",

1081 << "Only inner loops with induction or reduction PHI nodes can be"

1082 " interchange currently.";

1083 });

1084 return true;

1085 }

1086 }

1087

1088

1089 if (!isLoopStructureUnderstood()) {

1090 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");

1091 ORE->emit([&]() {

1092 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",

1095 << "Inner loop structure not understood currently.";

1096 });

1097 return true;

1098 }

1099

1100 return false;

1101}

1102

1103bool LoopInterchangeLegality::findInductions(

1104 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {

1105 for (PHINode &PHI : L->getHeader()->phis()) {

1106 InductionDescriptor ID;

1109 }

1110 return !Inductions.empty();

1111}

1112

1113

1114

1115

1116static bool

1121

1122 if (PHI.getNumIncomingValues() > 1)

1123 return false;

1125 PHINode *PN = dyn_cast(U);

1126 return !PN ||

1127 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));

1128 })) {

1129 return false;

1130 }

1131 }

1132 return true;

1133}

1134

1135

1136

1137

1138

1139

1140

1141

1148 continue;

1149

1150

1151

1152

1153

1154

1155

1156

1157

1158

1159

1161 return false;

1162 }

1163 }

1164 return true;

1165}

1166

1167

1168

1169

1170

1171

1172

1173

1174

1177 return true;

1178

1179

1180

1182 return true;

1183

1184

1185

1186

1187

1188

1189

1192 for (auto *U : PHI.users()) {

1194 if (InnerLoopLatch == UI->getParent())

1195 return false;

1196 }

1197 }

1198 return true;

1199}

1200

1201bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,

1202 unsigned OuterLoopId,

1203 CharMatrix &DepMatrix) {

1205 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId

1206 << " and OuterLoopId = " << OuterLoopId

1207 << " due to dependence\n");

1208 ORE->emit([&]() {

1209 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",

1212 << "Cannot interchange loops due to dependences.";

1213 });

1214 return false;

1215 }

1216

1217 for (auto *BB : OuterLoop->blocks())

1220

1221 if (CI->onlyWritesMemory())

1222 continue;

1224 dbgs() << "Loops with call instructions cannot be interchanged "

1225 << "safely.");

1226 ORE->emit([&]() {

1227 return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",

1228 CI->getDebugLoc(),

1229 CI->getParent())

1230 << "Cannot interchange loops due to call instruction.";

1231 });

1232

1233 return false;

1234 }

1235

1236 if (!findInductions(InnerLoop, InnerLoopInductions)) {

1237 LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");

1238 return false;

1239 }

1240

1242 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");

1243 ORE->emit([&]() {

1244 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",

1247 << "Cannot interchange loops because unsupported PHI nodes found "

1248 "in inner loop latch.";

1249 });

1250 return false;

1251 }

1252

1253

1254

1255 if (currentLimitations()) {

1256 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");

1257 return false;

1258 }

1259

1260

1261 if (!tightlyNested(OuterLoop, InnerLoop)) {

1263 ORE->emit([&]() {

1264 return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",

1267 << "Cannot interchange loops because they are not tightly "

1268 "nested.";

1269 });

1270 return false;

1271 }

1272

1274 OuterInnerReductions)) {

1275 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");

1276 ORE->emit([&]() {

1277 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",

1280 << "Found unsupported PHI node in loop exit.";

1281 });

1282 return false;

1283 }

1284

1286 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");

1287 ORE->emit([&]() {

1288 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",

1291 << "Found unsupported PHI node in loop exit.";

1292 });

1293 return false;

1294 }

1295

1296 return true;

1297}

1298

1299void CacheCostManager::computeIfUnitinialized() {

1300 if (CC.has_value())

1301 return;

1302

1305

1306

1307

1308

1309

1310

1311

1312

1313 if (*CC != nullptr)

1314 for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts()))

1315 CostMap[Cost.first] = Idx;

1316}

1317

1318CacheCost *CacheCostManager::getCacheCost() {

1319 computeIfUnitinialized();

1320 return CC->get();

1321}

1322

1323const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {

1324 computeIfUnitinialized();

1325 return CostMap;

1326}

1327

1328int LoopInterchangeProfitability::getInstrOrderCost() {

1329 unsigned GoodOrder, BadOrder;

1330 BadOrder = GoodOrder = 0;

1331 for (BasicBlock *BB : InnerLoop->blocks()) {

1332 for (Instruction &Ins : *BB) {

1334 bool FoundInnerInduction = false;

1335 bool FoundOuterInduction = false;

1337

1339 continue;

1340

1341 const SCEV *OperandVal = SE->getSCEV(Op);

1343 if (!AR)

1344 continue;

1345

1346

1347

1348

1349

1350

1351 if (AR->getLoop() == InnerLoop) {

1352

1353

1354 FoundInnerInduction = true;

1355 if (FoundOuterInduction) {

1356 GoodOrder++;

1357 break;

1358 }

1359 }

1360

1361

1362

1363

1364

1365 if (AR->getLoop() == OuterLoop) {

1366

1367

1368 FoundOuterInduction = true;

1369 if (FoundInnerInduction) {

1370 BadOrder++;

1371 break;

1372 }

1373 }

1374 }

1375 }

1376 }

1377 }

1378 return GoodOrder - BadOrder;

1379}

1380

1381std::optional

1382LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(

1383 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {

1384

1385

1386

1387 auto InnerLoopIt = CostMap.find(InnerLoop);

1388 if (InnerLoopIt == CostMap.end())

1389 return std::nullopt;

1390 auto OuterLoopIt = CostMap.find(OuterLoop);

1391 if (OuterLoopIt == CostMap.end())

1392 return std::nullopt;

1393

1395 return std::nullopt;

1396 unsigned InnerIndex = InnerLoopIt->second;

1397 unsigned OuterIndex = OuterLoopIt->second;

1399 << ", OuterIndex = " << OuterIndex << "\n");

1400 assert(InnerIndex != OuterIndex && "CostMap should assign unique "

1401 "numbers to each loop");

1402 return std::optional(InnerIndex < OuterIndex);

1403}

1404

1405std::optional

1406LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {

1407

1408

1409

1410 int Cost = getInstrOrderCost();

1413 return std::optional(true);

1414

1415 return std::nullopt;

1416}

1417

1418

1419static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {

1420 for (const auto &Dep : DepMatrix) {

1421 char Dir = Dep[LoopId];

1422 char DepType = Dep.back();

1423 assert((DepType == '<' || DepType == '*') &&

1424 "Unexpected element in dependency vector");

1425

1426

1427 if (Dir == '=' || Dir == 'I')

1428 continue;

1429

1430

1431

1432

1433 if (Dir == '<' && DepType == '<')

1434 continue;

1435

1436

1437 return false;

1438 }

1439 return true;

1440}

1441

1442std::optional LoopInterchangeProfitability::isProfitableForVectorization(

1443 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {

1444

1445

1447 return false;

1448

1449

1450

1452 return true;

1453

1454

1455

1456

1457

1458

1459 return std::nullopt;

1460}

1461

1462bool LoopInterchangeProfitability::isProfitable(

1463 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,

1464 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {

1465

1466

1467

1468

1469

1470

1473 if (InnerBTC && InnerBTC->isZero()) {

1474 LLVM_DEBUG(dbgs() << "Inner loop back-edge isn't taken, rejecting "

1475 "single iteration loop\n");

1476 return false;

1477 }

1478 if (OuterBTC && OuterBTC->isZero()) {

1479 LLVM_DEBUG(dbgs() << "Outer loop back-edge isn't taken, rejecting "

1480 "single iteration loop\n");

1481 return false;

1482 }

1483

1484

1486 return true;

1488 "Duplicate rules and option 'ignore' are not allowed");

1489

1490

1491

1492

1493

1494

1495

1496

1497

1498 std::optional shouldInterchange;

1500 switch (RT) {

1501 case RuleTy::PerLoopCacheAnalysis: {

1502 CacheCost *CC = CCM.getCacheCost();

1503 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();

1504 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);

1505 break;

1506 }

1507 case RuleTy::PerInstrOrderCost:

1508 shouldInterchange = isProfitablePerInstrOrderCost();

1509 break;

1510 case RuleTy::ForVectorization:

1511 shouldInterchange =

1512 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);

1513 break;

1514 case RuleTy::Ignore:

1515 llvm_unreachable("Option 'ignore' is not supported with other options");

1516 break;

1517 }

1518

1519

1520

1521 if (shouldInterchange.has_value())

1522 break;

1523 }

1524

1525 if (!shouldInterchange.has_value()) {

1526 ORE->emit([&]() {

1527 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",

1530 << "Insufficient information to calculate the cost of loop for "

1531 "interchange.";

1532 });

1533 return false;

1534 } else if (!shouldInterchange.value()) {

1535 ORE->emit([&]() {

1536 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",

1539 << "Interchanging loops is not considered to improve cache "

1540 "locality nor vectorization.";

1541 });

1542 return false;

1543 }

1544 return true;

1545}

1546

1547void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,

1548 Loop *InnerLoop) {

1549 for (Loop *L : *OuterLoop)

1550 if (L == InnerLoop) {

1551 OuterLoop->removeChildLoop(L);

1552 return;

1553 }

1555}

1556

1557

1558

1559

1560

1561

1562

1563

1564

1565

1566

1567

1568

1569

1570

1571

1572

1573

1574

1575

1576

1577

1578

1579

1580void LoopInterchangeTransform::restructureLoops(

1581 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,

1582 BasicBlock *OrigOuterPreHeader) {

1583 Loop *OuterLoopParent = OuterLoop->getParentLoop();

1584

1585

1587 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);

1588

1589

1590 if (OuterLoopParent) {

1591

1592 removeChildLoop(OuterLoopParent, NewInner);

1593 removeChildLoop(NewInner, NewOuter);

1595 } else {

1596 removeChildLoop(NewInner, NewOuter);

1598 }

1602

1603

1604 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());

1605

1606

1607

1608 for (BasicBlock *BB : NewInner->blocks())

1611

1612

1613

1616 for (BasicBlock *BB : OrigInnerBBs) {

1617

1619 continue;

1620

1621 if (BB == OuterHeader || BB == OuterLatch)

1623 else

1625 }

1626

1627

1628

1630 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);

1631

1632

1634}

1635

1636bool LoopInterchangeTransform::transform(

1638 bool Transformed = false;

1639

1642 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");

1643 auto &InductionPHIs = LIL.getInnerLoopInductions();

1644 if (InductionPHIs.empty()) {

1645 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");

1646 return false;

1647 }

1648

1649 SmallVector<Instruction *, 8> InnerIndexVarList;

1650 for (PHINode *CurInductionPHI : InductionPHIs) {

1651 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)

1654 else

1657 }

1658

1659

1660

1661

1662

1666

1667 SmallSetVector<Instruction *, 4> WorkList;

1668 unsigned i = 0;

1669 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {

1670 for (; i < WorkList.size(); i++) {

1671

1672

1673 Instruction *NewI = WorkList[i]->clone();

1676 "Moving instructions with side-effects may change behavior of "

1677 "the loop nest!");

1681 UserI->getParent() == NewLatch ||

1683 U.set(NewI);

1684 }

1685

1686

1687 for (Value *Op : WorkList[i]->operands()) {

1689 if (!OpI ||

1692 continue;

1693 WorkList.insert(OpI);

1694 }

1695 }

1696 };

1697

1698

1701 ->getCondition());

1702 if (CondI)

1703 WorkList.insert(CondI);

1704 MoveInstructions();

1705 for (Instruction *InnerIndexVar : InnerIndexVarList)

1707 MoveInstructions();

1708 }

1709

1710

1715 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");

1716 }

1717

1718

1719

1720

1721

1722

1724 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();

1725 if (InnerLoopPreHeader != OuterLoopHeader) {

1726 for (Instruction &I :

1728 std::prev(InnerLoopPreHeader->end()))))

1730 }

1731

1732 Transformed |= adjustLoopLinks();

1733 if (!Transformed) {

1735 return false;

1736 }

1737

1738

1739

1740 for (Instruction *Reduction : DropNoWrapInsts) {

1741 Reduction->setHasNoSignedWrap(false);

1742 Reduction->setHasNoUnsignedWrap(false);

1743 }

1744

1745 return true;

1746}

1747

1748

1749

1756

1757

1759

1760

1764 I->removeFromParent();

1765

1766

1768

1769

1772}

1773

1774

1775

1776

1779 std::vectorDominatorTree::UpdateType &DTUpdates,

1780 bool MustUpdateOnce = true) {

1782 "BI must jump to OldBB exactly once.");

1785 if (Op == OldBB) {

1786 Op.set(NewBB);

1788 }

1789

1791 DTUpdates.push_back(

1792 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});

1793 DTUpdates.push_back(

1794 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});

1795 }

1796 assert(Changed && "Expected a successor to be updated");

1797}

1798

1799

1804

1805

1806

1807

1808

1809

1810

1811

1813 assert(P.getNumIncomingValues() == 1 &&

1814 "Only loops with a single exit are supported!");

1815

1816

1817 auto IncI = cast(P.getIncomingValueForBlock(InnerLatch));

1818

1819

1821

1822

1823 if (IncIInnerMost->getParent() != InnerLatch &&

1824 IncIInnerMost->getParent() != InnerHeader)

1825 continue;

1826

1828 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {

1829 return (cast(U)->getParent() == OuterHeader &&

1830 IncI->getParent() == InnerHeader) ||

1831 cast(U)->getParent() == OuterExit;

1832 }) &&

1833 "Can only replace phis iff the uses are in the loop nest exit or "

1834 "the incoming value is defined in the inner header (it will "

1835 "dominate all loop blocks after interchanging)");

1836 P.replaceAllUsesWith(IncI);

1837 P.eraseFromParent();

1838 }

1839

1842

1845

1846

1847

1848

1849

1850

1851 for (PHINode *P : LcssaInnerExit)

1853

1854

1855

1856 for (PHINode *P : LcssaInnerLatch)

1858

1859

1860

1861

1862

1863 if (OuterExit) {

1865 if (P.getNumIncomingValues() != 1)

1866 continue;

1867

1868

1870 if (I || LI->getLoopFor(I->getParent()) == InnerLoop)

1871 continue;

1872

1876

1877

1878 for (auto *Pred : predecessors(InnerLatch)) {

1879 if (Pred == OuterLatch)

1880 continue;

1881 NewPhi->addIncoming(P.getIncomingValue(0), Pred);

1882 }

1884 P.setIncomingValue(0, NewPhi);

1885 }

1886 }

1887

1888

1889

1890

1892}

1893

1894

1895

1896

1897

1898

1899

1900

1901

1902

1903

1904

1905

1906

1907

1908

1909

1910

1911

1912

1913

1914

1915

1916

1917

1918

1922

1923

1924 if (OuterLoopLatch == InnerLoopExit)

1925 return;

1926

1927

1930 for (PHINode *Phi : Phis) {

1931 assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected");

1932 LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi

1933 << "\n");

1934 Phi->replaceAllUsesWith(Phi->getIncomingValue(0));

1935 Phi->eraseFromParent();

1936 }

1937}

1938

1939bool LoopInterchangeTransform::adjustLoopBranches() {

1940 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");

1941 std::vectorDominatorTree::UpdateType DTUpdates;

1942

1943 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();

1945

1946 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&

1947 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&

1948 InnerLoopPreHeader && "Guaranteed by loop-simplify form");

1949

1951

1952

1953

1954

1955

1958 OuterLoopPreHeader =

1960 if (InnerLoopPreHeader == OuterLoop->getHeader())

1961 InnerLoopPreHeader =

1963

1964

1966 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();

1968 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();

1970 BasicBlock *InnerLoopLatchPredecessor =

1972 BasicBlock *InnerLoopLatchSuccessor;

1973 BasicBlock *OuterLoopLatchSuccessor;

1974

1975 BranchInst *OuterLoopLatchBI =

1977 BranchInst *InnerLoopLatchBI =

1979 BranchInst *OuterLoopHeaderBI =

1981 BranchInst *InnerLoopHeaderBI =

1983

1984 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||

1985 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||

1986 !InnerLoopHeaderBI)

1987 return false;

1988

1989 BranchInst *InnerLoopLatchPredecessorBI =

1991 BranchInst *OuterLoopPredecessorBI =

1993

1994 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)

1995 return false;

1997 if (!InnerLoopHeaderSuccessor)

1998 return false;

1999

2000

2001

2002

2003

2004 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,

2005 InnerLoopPreHeader, DTUpdates, false);

2006

2007

2009

2010 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,

2011 DTUpdates,

2012 false);

2013 }

2015 InnerLoopHeaderSuccessor, DTUpdates,

2016 false);

2017

2018

2020 OuterLoopHeader);

2021

2022 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,

2023 OuterLoopPreHeader, DTUpdates);

2024

2025

2026 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)

2027 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);

2028 else

2029 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);

2030

2031 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,

2032 InnerLoopLatchSuccessor, DTUpdates);

2033

2034 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)

2035 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);

2036 else

2037 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);

2038

2039 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,

2040 OuterLoopLatchSuccessor, DTUpdates);

2041 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,

2042 DTUpdates);

2043

2045 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,

2046 OuterLoopPreHeader);

2047

2048 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,

2049 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),

2050 InnerLoop, LI);

2051

2052

2053 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);

2054

2055 auto &OuterInnerReductions = LIL.getOuterInnerReductions();

2056

2058 for (PHINode &PHI : InnerLoopHeader->phis())

2059 if (OuterInnerReductions.contains(&PHI))

2061

2062 for (PHINode &PHI : OuterLoopHeader->phis())

2063 if (OuterInnerReductions.contains(&PHI))

2065

2066

2067

2068

2069 for (PHINode *PHI : OuterLoopPHIs) {

2070 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););

2072 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");

2073 }

2074 for (PHINode *PHI : InnerLoopPHIs) {

2075 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););

2077 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");

2078 }

2079

2080

2081 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);

2082 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);

2083 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);

2084 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);

2085

2086

2087

2088

2089

2090 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;

2091 for (Instruction &I :

2092 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))

2095

2096 return true;

2097}

2098

2099bool LoopInterchangeTransform::adjustLoopLinks() {

2100

2101 bool Changed = adjustLoopBranches();

2103

2104

2105

2106 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();

2108 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);

2109 }

2111}

2112

2119

2121 LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");

2123 }

2125

2126

2129

2131 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");

2133 }

2134

2135 ORE.emit([&]() {

2139 << "Computed dependence info, invoking the transform.";

2140 });

2141

2143 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))

2145 U.markLoopNestChanged(true);

2147}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file defines the StringMap class.

ReachingDefInfo InstSet InstSet & Ignore

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)

Move the contents of SourceBB to before the last instruction of TargetBB.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

This file defines the interface for the loop cache analysis.

SmallVector< Loop *, 4 > LoopVector

Loop::LoopBounds::Direction Direction

static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))

static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))

static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)

Definition LoopInterchange.cpp:903

static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))

static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)

Definition LoopInterchange.cpp:423

static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)

Definition LoopInterchange.cpp:1142

static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)

This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...

Definition LoopInterchange.cpp:1919

static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)

Definition LoopInterchange.cpp:327

static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)

Definition LoopInterchange.cpp:1800

static void printDepMatrix(CharMatrix &DepMatrix)

Definition LoopInterchange.cpp:135

static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)

Swap instructions between BB1 and BB2 but keep terminators intact.

Definition LoopInterchange.cpp:1758

static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))

static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)

Definition LoopInterchange.cpp:402

static bool inThisOrder(const Instruction *Src, const Instruction *Dst)

Return true if Src appears before Dst in the same basic block.

Definition LoopInterchange.cpp:148

static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)

Definition LoopInterchange.cpp:1175

static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)

Return true if we can vectorize the loop specified by LoopId.

Definition LoopInterchange.cpp:1419

static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)

Definition LoopInterchange.cpp:350

static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)

Definition LoopInterchange.cpp:1117

#define DEBUG_TYPE

Definition LoopInterchange.cpp:54

static Value * followLCSSA(Value *SV)

Definition LoopInterchange.cpp:891

static void populateWorklist(Loop &L, LoopVector &LoopList)

Definition LoopInterchange.cpp:379

static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)

Definition LoopInterchange.cpp:166

static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)

Definition LoopInterchange.cpp:1777

static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)

Definition LoopInterchange.cpp:339

static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))

static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)

Definition LoopInterchange.cpp:124

This file defines the interface for the loop nest analysis.

This header provides classes for managing a pipeline of passes over loops in LLVM IR.

loop Loop Strength Reduction

uint64_t IntrinsicInst * II

SmallVector< Value *, 8 > ValueVector

This file defines the SmallSet class.

This file defines the SmallVector class.

static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

const T & front() const

front - Get the first element.

size_t size() const

size - Get the array size.

ArrayRef< T > slice(size_t N, size_t M) const

slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const

Return a const iterator range over the instructions in the block, skipping any debug instructions.

LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const

Returns an iterator to the first instruction in this block that is not a PHINode instruction.

LLVM_ABI const BasicBlock * getUniqueSuccessor() const

Return the successor of this block if it has a unique successor.

LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)

Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.

LLVM_ABI const BasicBlock * getUniquePredecessor() const

Return the predecessor of this block if it has a unique predecessor block.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)

Transfer all instructions from FromBB to this basic block at ToIt.

Conditional or Unconditional Branch instruction.

iterator_range< succ_op_iterator > successors()

bool isConditional() const

BasicBlock * getSuccessor(unsigned i) const

Value * getCondition() const

static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)

Create a CacheCost for the loop nest rooted by Root.

CacheCostTy getLoopCost(const Loop &L) const

Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...

iterator find(const_arg_type_t< KeyT > Val)

DependenceInfo - This class is the main dependence-analysis driver.

LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)

depends - Tests for a dependence between the Src and Dst instructions.

void applyUpdates(ArrayRef< UpdateType > Updates)

Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...

static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)

Returns true if Phi is an induction in the loop L.

LLVM_ABI void insertBefore(InstListType::iterator InsertPos)

Insert an unlinked instruction into a basic block immediately before the specified position.

LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY

Return true if the instruction may have side effects.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

BlockT * getLoopLatch() const

If there is a single latch block for this loop, return it.

bool isInnermost() const

Return true if the loop does not contain any (natural) loops.

void removeBlockFromLoop(BlockT *BB)

This removes the specified basic block from the current loop, updating the Blocks as appropriate.

const std::vector< LoopT * > & getSubLoops() const

Return the loops contained entirely within this loop.

BlockT * getHeader() const

iterator_range< block_iterator > blocks() const

void addChildLoop(LoopT *NewChild)

Add the specified loop to be a child of this loop.

void addBlockEntry(BlockT *BB)

This adds a basic block directly to the basic block list.

BlockT * getExitBlock() const

If getExitBlocks would return exactly one block, return that block.

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

BlockT * getExitingBlock() const

If getExitingBlocks would return exactly one block, return that block.

BlockT * getUniqueExitBlock() const

If getUniqueExitBlocks would return exactly one block, return that block.

LoopT * removeChildLoop(iterator I)

This removes the specified child from being a subloop of this loop.

void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)

Replace the specified loop in the top-level loops list with the indicated loop.

void changeLoopFor(BlockT *BB, LoopT *L)

Change the top-level loop that contains BB to the specified loop.

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

This class represents a loop nest and can be used to query its properties.

static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)

Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).

ArrayRef< Loop * > getLoops() const

Get the loops in the nest.

Function * getParent() const

Return the function to which the loop-nest belongs.

Loop & getOutermostLoop() const

Return the outermost loop in the loop nest.

Represents a single loop in the control flow graph.

DebugLoc getStartLoc() const

Return the debug location of the start of this loop.

bool isLoopInvariant(const Value *V) const

Return true if the specified value is loop invariant.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

op_range incoming_values()

void setIncomingBlock(unsigned i, BasicBlock *BB)

void setIncomingValue(unsigned i, Value *V)

static unsigned getIncomingValueNumForOperand(unsigned i)

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

The RecurrenceDescriptor is used to identify recurrences variables in a loop.

Instruction * getExactFPMathInst() const

Returns 1st non-reassociative FP instruction in the PHI node's use-chain.

unsigned getOpcode() const

static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)

Returns true if Phi is a reduction in TheLoop.

LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const

Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...

RecurKind getRecurrenceKind() const

const Loop * getLoop() const

This class represents an analyzed expression in the program.

LLVM_ABI bool isZero() const

Return true if the expression is a constant zero.

The main scalar evolution driver.

LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)

If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI void forgetLoop(const Loop *L)

This method should be called by the client when it has changed a loop in a way that may effect Scalar...

LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

LLVM_ABI bool isSCEVable(Type *Ty) const

Test if values of the given type are analyzable within the SCEV framework.

size_type size() const

Determine the number of elements in the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...

std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)

Emplace a new element for the specified key into the map if the key isn't already in the map.

StringRef - Represent a constant reference to a string, i.e.

constexpr size_t size() const

size - Get the string size.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

iterator_range< user_iterator > users()

const ParentTy * getParent() const

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

list_initializer< Ty > list_init(ArrayRef< Ty > Vals)

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)

InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

auto successors(const MachineBasicBlock *BB)

LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)

Put a loop nest into LCSSA form.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

auto map_range(ContainerTy &&C, FuncTy F)

AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager

The loop analysis manager.

OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)

Wrapper function around std::transform to apply a function to a range and store the result elsewhere.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

auto drop_end(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the last N elements excluded.

RecurKind

These are the kinds of recurrences that we support.

@ UMin

Unsigned integer min implemented in terms of select(cmp()).

@ FMinimumNum

FP min with llvm.minimumnum semantics.

@ Or

Bitwise or logical OR of integers.

@ FMinimum

FP min with llvm.minimum semantics.

@ Mul

Product of integers.

@ AnyOf

AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...

@ Xor

Bitwise or logical XOR of integers.

@ FMax

FP max implemented in terms of select(cmp()).

@ FMaximum

FP max with llvm.maximum semantics.

@ FMulAdd

Sum of float products with llvm.fmuladd(a * b + sum).

@ SMax

Signed integer max implemented in terms of select(cmp()).

@ And

Bitwise or logical AND of integers.

@ SMin

Signed integer min implemented in terms of select(cmp()).

@ FMin

FP min implemented in terms of select(cmp()).

@ FMaximumNum

FP max with llvm.maximumnum semantics.

@ UMax

Unsigned integer max implemented in terms of select(cmp()).

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

DWARFExpression::Operation Op

ArrayRef(const T &OneElt) -> ArrayRef< T >

LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)

Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

auto predecessors(const MachineBasicBlock *BB)

iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...

PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

Definition LoopInterchange.cpp:2113

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...