LLVM: lib/Transforms/Scalar/LoopFuse.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

67

68using namespace llvm;

69

70#define DEBUG_TYPE "loop-fusion"

71

73STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");

74STATISTIC(InvalidPreheader, "Loop has invalid preheader");

75STATISTIC(InvalidHeader, "Loop has invalid header");

76STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");

77STATISTIC(InvalidExitBlock, "Loop has invalid exit block");

78STATISTIC(InvalidLatch, "Loop has invalid latch");

80STATISTIC(AddressTakenBB, "Basic block has address taken");

81STATISTIC(MayThrowException, "Loop may throw an exception");

82STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");

83STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");

84STATISTIC(InvalidDependencies, "Dependencies prevent fusion");

85STATISTIC(UnknownTripCount, "Loop has unknown trip count");

86STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");

87STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");

88STATISTIC(NonAdjacent, "Loops are not adjacent");

90 NonEmptyPreheader,

91 "Loop has a non-empty preheader with instructions that cannot be moved");

92STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");

93STATISTIC(NonIdenticalGuards, "Candidates have different guards");

94STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with "

95 "instructions that cannot be moved");

96STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "

97 "instructions that cannot be moved");

98STATISTIC(NotRotated, "Candidate is not rotated");

100 "The second candidate is guarded while the first one is not");

101STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");

102STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");

104

110

112 "loop-fusion-dependence-analysis",

113 cl::desc("Which dependence analysis should loop fusion use?"),

115 "Use the scalar evolution interface"),

117 "Use the dependence analysis interface"),

119 "Use all available analyses")),

121

124 cl::desc("Max number of iterations to be peeled from a loop, such that "

125 "fusion can take place"));

126

127#ifndef NDEBUG

130 cl::desc("Enable verbose debugging for Loop Fusion"),

132#endif

133

134namespace {

135

136

137

138

139

140

141

142

143

144

145struct FusionCandidate {

146

147

148

149

150

152

154

156

158

160

162

164

166

167 bool Valid;

168

170

172

173 bool AbleToPeel;

174

175 bool Peeled;

176

177

178

179

180

183

185

188 : Preheader(L->getLoopPreheader()), Header(L->getHeader()),

189 ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),

190 Latch(L->getLoopLatch()), L(L), Valid(true),

191 GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),

192 Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {

193

194

195

196

197

199 if (BB->hasAddressTaken()) {

200 invalidate();

201 reportInvalidCandidate(AddressTakenBB);

202 return;

203 }

204

206 if (I.mayThrow()) {

207 invalidate();

209 return;

210 }

212 if (SI->isVolatile()) {

213 invalidate();

215 return;

216 }

217 }

219 if (LI->isVolatile()) {

220 invalidate();

222 return;

223 }

224 }

225 if (I.mayWriteToMemory())

226 MemWrites.push_back(&I);

227 if (I.mayReadFromMemory())

228 MemReads.push_back(&I);

229 }

230 }

231 }

232

233

235 return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&

236 L->isInvalid() && Valid;

237 }

238

239

240 void verify() const {

242 assert(L->isInvalid() && "Loop is invalid!");

243 assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");

244 assert(Header == L->getHeader() && "Header is out of sync");

245 assert(ExitingBlock == L->getExitingBlock() &&

246 "Exiting Blocks is out of sync");

247 assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");

248 assert(Latch == L->getLoopLatch() && "Latch is out of sync");

249 }

250

251

252

253

254

255

257 if (GuardBranch)

259 else

260 return Preheader;

261 }

262

263

264

265 void updateAfterPeeling() {

266 Preheader = L->getLoopPreheader();

267 Header = L->getHeader();

268 ExitingBlock = L->getExitingBlock();

269 ExitBlock = L->getExitBlock();

270 Latch = L->getLoopLatch();

272 }

273

274

275

276

277

278

279

280

281 BasicBlock *getNonLoopBlock() const {

282 assert(GuardBranch && "Only valid on guarded loops.");

284 "Expecting guard to be a conditional branch.");

285 if (Peeled)

287 return (GuardBranch->getSuccessor(0) == Preheader)

290 }

291

292#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

294 dbgs() << "\tGuardBranch: ";

295 if (GuardBranch)

296 dbgs() << *GuardBranch;

297 else

298 dbgs() << "nullptr";

299 dbgs() << "\n"

300 << (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"

301 << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")

302 << "\n"

303 << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"

304 << "\tExitingBB: "

305 << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"

306 << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")

307 << "\n"

308 << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"

309 << "\tEntryBlock: "

310 << (getEntryBlock() ? getEntryBlock()->getName() : "nullptr")

311 << "\n";

312 }

313#endif

314

315

316

317

320 LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");

321 if (!Preheader)

322 ++InvalidPreheader;

323 if (!Header)

324 ++InvalidHeader;

325 if (!ExitingBlock)

326 ++InvalidExitingBlock;

327 if (!ExitBlock)

328 ++InvalidExitBlock;

329 if (!Latch)

330 ++InvalidLatch;

331 if (L->isInvalid())

332 ++InvalidLoop;

333

334 return false;

335 }

336

337

340 << " trip count not computable!\n");

342 }

343

344 if (L->isLoopSimplifyForm()) {

346 << " is not in simplified form!\n");

348 }

349

350 if (L->isRotatedForm()) {

351 LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n");

353 }

354

355 return true;

356 }

357

358private:

359

360

361

362

363

364

365 void invalidate() {

366 MemWrites.clear();

367 MemReads.clear();

369 }

370

372 using namespace ore;

373 assert(L && Preheader && "Fusion candidate not initialized properly!");

374#if LLVM_ENABLE_STATS

375 ++Stat;

377 L->getStartLoc(), Preheader)

379 << "Loop is not a candidate for fusion: " << Stat.getDesc());

380#endif

381 return false;

382 }

383};

384

385struct FusionCandidateCompare {

386

387

388

389

390

391

392

393

394

395

396 bool operator()(const FusionCandidate &LHS,

397 const FusionCandidate &RHS) const {

398 const DominatorTree *DT = &(LHS.DT);

399

402

403

404

405 assert(DT && LHS.PDT && "Expecting valid dominator tree");

406

407

408 if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {

409

410

411 assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));

412 return false;

413 }

414

415 if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {

416

417 assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));

418 return true;

419 }

420

421

422

423

424

425 bool WrongOrder =

427 bool RightOrder =

429 if (WrongOrder && RightOrder) {

430

431

432

433 DomTreeNode *LNode = LHS.PDT->getNode(LHSEntryBlock);

434 DomTreeNode *RNode = LHS.PDT->getNode(RHSEntryBlock);

436 } else if (WrongOrder)

437 return false;

438 else if (RightOrder)

439 return true;

440

441

442

443

445 "No dominance relationship between these fusion candidates!");

446 }

447};

448}

449

451

452

453

454

455

456

457

458

459

460

461

464

465#ifndef NDEBUG

467 dbgs() << "****************************\n";

468 for (const Loop *L : LV)

470 dbgs() << "****************************\n";

471}

472

474 if (FC.isValid())

475 OS << FC.Preheader->getName();

476 else

477 OS << "";

478

479 return OS;

480}

481

484 for (const FusionCandidate &FC : CandSet)

485 OS << FC << '\n';

486

487 return OS;

488}

489

490static void

492 dbgs() << "Fusion Candidates: \n";

493 for (const auto &CandidateSet : FusionCandidates) {

494 dbgs() << "*** Fusion Candidate Set ***\n";

495 dbgs() << CandidateSet;

496 dbgs() << "****************************\n";

497 }

498}

499#endif

500

501namespace {

502

503

504

505

506

507

508

509struct LoopDepthTree {

510 using LoopsOnLevelTy = SmallVector<LoopVector, 4>;

513

514 LoopDepthTree(LoopInfo &LI) : Depth(1) {

517 }

518

519

520

521 bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); }

522

523

524

525 void removeLoop(const Loop *L) { RemovedLoops.insert(L); }

526

527

528 void descend() {

529 LoopsOnLevelTy LoopsOnNextLevel;

530

532 for (Loop *L : LV)

533 if (!isRemovedLoop(L) && L->begin() != L->end())

534 LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end()));

535

536 LoopsOnLevel = LoopsOnNextLevel;

537 RemovedLoops.clear();

538 Depth++;

539 }

540

541 bool empty() const { return size() == 0; }

542 size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }

543 unsigned getDepth() const { return Depth; }

544

545 iterator begin() { return LoopsOnLevel.begin(); }

546 iterator end() { return LoopsOnLevel.end(); }

547 const_iterator begin() const { return LoopsOnLevel.begin(); }

548 const_iterator end() const { return LoopsOnLevel.end(); }

549

550private:

551

552

553 SmallPtrSet<const Loop *, 8> RemovedLoops;

554

555

556 unsigned Depth;

557

558

559 LoopsOnLevelTy LoopsOnLevel;

560};

561

562struct LoopFuser {

563private:

564

566

567 LoopDepthTree LDT;

568 DomTreeUpdater DTU;

569

570 LoopInfo &LI;

571 DominatorTree &DT;

572 DependenceInfo &DI;

573 ScalarEvolution &SE;

574 PostDominatorTree &PDT;

575 OptimizationRemarkEmitter &ORE;

576 AssumptionCache &AC;

577 const TargetTransformInfo &TTI;

578

579public:

580 LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,

581 ScalarEvolution &SE, PostDominatorTree &PDT,

582 OptimizationRemarkEmitter &ORE, const DataLayout &DL,

583 AssumptionCache &AC, const TargetTransformInfo &TTI)

584 : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),

585 DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}

586

587

588

589

590 bool fuseLoops(Function &F) {

591#ifndef NDEBUG

593 LI.print(dbgs());

594 }

595#endif

596

597 LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()

598 << "\n");

600

601 while (!LDT.empty()) {

602 LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "

603 << LDT.getDepth() << "\n";);

604

606 assert(LV.size() > 0 && "Empty loop set was build!");

607

608

609

610 if (LV.size() == 1)

611 continue;

612#ifndef NDEBUG

615 dbgs() << " Visit loop set (#" << LV.size() << "):\n";

617 });

618 }

619#endif

620

621 collectFusionCandidates(LV);

622 Changed |= fuseCandidates();

623 }

624

625

626

627

628

629

630

632 LDT.descend();

633 FusionCandidates.clear();

634 }

635

637 LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););

638

639#ifndef NDEBUG

641 assert(PDT.verify());

642 LI.verify(DT);

643 SE.verify();

644#endif

645

648 }

649

650private:

651

652

653

654

655

656

658 const FusionCandidate &FC1) const {

659 assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");

660

661 return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),

662 DT, PDT);

663 }

664

665

666

667

668 void collectFusionCandidates(const LoopVector &LV) {

669 for (Loop *L : LV) {

672 FusionCandidate CurrCand(L, DT, &PDT, ORE, PP);

673 if (!CurrCand.isEligibleForFusion(SE))

674 continue;

675

676

677

678

679

680

681 bool FoundSet = false;

682

683 for (auto &CurrCandSet : FusionCandidates) {

685 CurrCandSet.insert(CurrCand);

686 FoundSet = true;

687#ifndef NDEBUG

690 << " to existing candidate set\n");

691#endif

692 break;

693 }

694 }

695 if (!FoundSet) {

696

697#ifndef NDEBUG

699 LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");

700#endif

702 NewCandSet.insert(CurrCand);

703 FusionCandidates.push_back(NewCandSet);

704 }

705 NumFusionCandidates++;

706 }

707 }

708

709

710

711

712

713

714 bool isBeneficialFusion(const FusionCandidate &FC0,

715 const FusionCandidate &FC1) {

716 return true;

717 }

718

719

720

721

722

723

724

725

726

727 std::pair<bool, std::optional>

728 haveIdenticalTripCounts(const FusionCandidate &FC0,

729 const FusionCandidate &FC1) const {

730 const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);

732 UncomputableTripCount++;

733 LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");

734 return {false, std::nullopt};

735 }

736

737 const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);

739 UncomputableTripCount++;

740 LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");

741 return {false, std::nullopt};

742 }

743

744 LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "

745 << *TripCount1 << " are "

746 << (TripCount0 == TripCount1 ? "identical" : "different")

747 << "\n");

748

749 if (TripCount0 == TripCount1)

750 return {true, 0};

751

752 LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "

753 "determining the difference between trip counts\n");

754

755

756

757 const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);

758 const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);

759

760

761

762 if (TC0 == 0 || TC1 == 0) {

763 LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "

764 "have a constant number of iterations. Peeling "

765 "is not benefical\n");

766 return {false, std::nullopt};

767 }

768

769 std::optional Difference;

770 int Diff = TC0 - TC1;

771

772 if (Diff > 0)

773 Difference = Diff;

774 else {

776 dbgs() << "Difference is less than 0. FC1 (second loop) has more "

777 "iterations than the first one. Currently not supported\n");

778 }

779

780 LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference

781 << "\n");

782

783 return {false, Difference};

784 }

785

786 void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,

787 unsigned PeelCount) {

788 assert(FC0.AbleToPeel && "Should be able to peel loop");

789

790 LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount

791 << " iterations of the first loop. \n");

792

794 FC0.Peeled =

795 peelLoop(FC0.L, PeelCount, false, &LI, &SE, DT, &AC, true, VMap);

796 if (FC0.Peeled) {

798

799#ifndef NDEBUG

800 auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);

801

802 assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&

803 "Loops should have identical trip counts after peeling");

804#endif

805

807

808

809 PDT.recalculate(*FC0.Preheader->getParent());

810

811 FC0.updateAfterPeeling();

812

813

814

815

816

817

818

819

820

822 FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;

823 if (BB) {

825 SmallVector<Instruction *, 8> WorkList;

827 if (Pred != FC0.ExitBlock) {

828 WorkList.emplace_back(Pred->getTerminator());

830 DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB));

831 }

832 }

833

834

835 for (Instruction *CurrentBranch : WorkList) {

836 BasicBlock *Succ = CurrentBranch->getSuccessor(0);

837 if (Succ == BB)

838 Succ = CurrentBranch->getSuccessor(1);

840 }

841

842 DTU.applyUpdates(TreeUpdates);

843 DTU.flush();

844 }

846 dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount

847 << " iterations from the first loop.\n"

848 "Both Loops have the same number of iterations now.\n");

849 }

850 }

851

852

853

854

855

856

857

858

859 bool fuseCandidates() {

860 bool Fused = false;

862 for (auto &CandidateSet : FusionCandidates) {

863 if (CandidateSet.size() < 2)

864 continue;

865

866 LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"

867 << CandidateSet << "\n");

868

869 for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {

870 assert(!LDT.isRemovedLoop(FC0->L) &&

871 "Should not have removed loops in CandidateSet!");

872 auto FC1 = FC0;

873 for (++FC1; FC1 != CandidateSet.end(); ++FC1) {

874 assert(!LDT.isRemovedLoop(FC1->L) &&

875 "Should not have removed loops in CandidateSet!");

876

877 LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();

878 dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");

879

880 FC0->verify();

881 FC1->verify();

882

883

884

885

886

887

888 std::pair<bool, std::optional> IdenticalTripCountRes =

889 haveIdenticalTripCounts(*FC0, *FC1);

890 bool SameTripCount = IdenticalTripCountRes.first;

891 std::optional TCDifference = IdenticalTripCountRes.second;

892

893

894

895 if (FC0->AbleToPeel && !SameTripCount && TCDifference) {

898 << "Difference in loop trip counts: " << *TCDifference

899 << " is greater than maximum peel count specificed: "

901 } else {

902

903

904 SameTripCount = true;

905 }

906 }

907

908 if (!SameTripCount) {

909 LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "

910 "counts. Not fusing.\n");

911 reportLoopFusion(*FC0, *FC1,

912 NonEqualTripCount);

913 continue;

914 }

915

916 if (!isAdjacent(*FC0, *FC1)) {

918 << "Fusion candidates are not adjacent. Not fusing.\n");

919 reportLoopFusion(*FC0, *FC1, NonAdjacent);

920 continue;

921 }

922

923 if ((!FC0->GuardBranch && FC1->GuardBranch) ||

924 (FC0->GuardBranch && !FC1->GuardBranch)) {

925 LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "

926 "another one is not. Not fusing.\n");

927 reportLoopFusion(

928 *FC0, *FC1, OnlySecondCandidateIsGuarded);

929 continue;

930 }

931

932

933

934 if (FC0->GuardBranch && FC1->GuardBranch &&

935 !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {

936 LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "

937 "guards. Not Fusing.\n");

938 reportLoopFusion(*FC0, *FC1,

939 NonIdenticalGuards);

940 continue;

941 }

942

943 if (FC0->GuardBranch) {

944 assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");

945

948 &PDT, &DI)) {

949 LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "

950 "instructions in exit block. Not fusing.\n");

951 reportLoopFusion(*FC0, *FC1,

952 NonEmptyExitBlock);

953 continue;

954 }

955

958 *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,

959 &DI)) {

961 << "Fusion candidate contains unsafe "

962 "instructions in guard block. Not fusing.\n");

963 reportLoopFusion(*FC0, *FC1,

964 NonEmptyGuardBlock);

965 continue;

966 }

967 }

968

969

970

971 if (!dependencesAllowFusion(*FC0, *FC1)) {

972 LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");

973 reportLoopFusion(*FC0, *FC1,

974 InvalidDependencies);

975 continue;

976 }

977

978

979

980

981 SmallVector<Instruction *, 4> SafeToHoist;

982 SmallVector<Instruction *, 4> SafeToSink;

983

984

985

986 if (!isEmptyPreheader(*FC1)) {

987 LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "

988 "preheader.\n");

989

990

991

992 if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,

993 SafeToSink)) {

994 LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "

995 "Fusion Candidate Pre-header.\n"

996 << "Not Fusing.\n");

997 reportLoopFusion(*FC0, *FC1,

998 NonEmptyPreheader);

999 continue;

1000 }

1001 }

1002

1003 bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);

1005 << "\tFusion appears to be "

1006 << (BeneficialToFuse ? "" : "un") << "profitable!\n");

1007 if (!BeneficialToFuse) {

1008 reportLoopFusion(*FC0, *FC1,

1009 FusionNotBeneficial);

1010 continue;

1011 }

1012

1013

1014

1015

1016

1017 movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);

1018

1019 LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "

1020 << *FC1 << "\n");

1021

1022 FusionCandidate FC0Copy = *FC0;

1023

1024

1025 bool Peel = TCDifference && *TCDifference > 0;

1026 if (Peel)

1027 peelFusionCandidate(FC0Copy, *FC1, *TCDifference);

1028

1029

1030

1031

1032

1033 reportLoopFusion((Peel ? FC0Copy : *FC0), *FC1,

1034 FuseCounter);

1035

1036 FusionCandidate FusedCand(

1037 performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,

1038 FC0Copy.PP);

1039 FusedCand.verify();

1040 assert(FusedCand.isEligibleForFusion(SE) &&

1041 "Fused candidate should be eligible for fusion!");

1042

1043

1044 LDT.removeLoop(FC1->L);

1045

1046 CandidateSet.erase(FC0);

1047 CandidateSet.erase(FC1);

1048

1049 auto InsertPos = CandidateSet.insert(FusedCand);

1050

1051 assert(InsertPos.second &&

1052 "Unable to insert TargetCandidate in CandidateSet!");

1053

1054

1055

1056

1057 FC0 = FC1 = InsertPos.first;

1058

1059 LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet

1060 << "\n");

1061

1062 Fused = true;

1063 }

1064 }

1065 }

1066 return Fused;

1067 }

1068

1069

1070

1071

1072

1073

1074 bool canHoistInst(Instruction &I,

1075 const SmallVector<Instruction *, 4> &SafeToHoist,

1076 const SmallVector<Instruction *, 4> &NotHoisting,

1077 const FusionCandidate &FC0) const {

1079 assert(FC0PreheaderTarget &&

1080 "Expected single successor for loop preheader.");

1081

1082 for (Use &Op : I.operands()) {

1084 bool OpHoisted = is_contained(SafeToHoist, OpInst);

1085

1086

1087 if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {

1088 return false;

1089 }

1090 }

1091 }

1092

1093

1094

1096 return false;

1097

1098

1099 if (I.mayReadOrWriteMemory())

1100 return true;

1101

1102 LLVM_DEBUG(dbgs() << "Checking if this mem inst can be hoisted.\n");

1103 for (Instruction *NotHoistedInst : NotHoisting) {

1104 if (auto D = DI.depends(&I, NotHoistedInst)) {

1105

1106

1107 if (D->isFlow() || D->isAnti() || D->isOutput()) {

1108 LLVM_DEBUG(dbgs() << "Inst depends on an instruction in FC1's "

1109 "preheader that is not being hoisted.\n");

1110 return false;

1111 }

1112 }

1113 }

1114

1115 for (Instruction *ReadInst : FC0.MemReads) {

1116 if (auto D = DI.depends(ReadInst, &I)) {

1117

1118 if (D->isAnti()) {

1119 LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC0.\n");

1120 return false;

1121 }

1122 }

1123 }

1124

1125 for (Instruction *WriteInst : FC0.MemWrites) {

1126 if (auto D = DI.depends(WriteInst, &I)) {

1127

1128 if (D->isFlow() || D->isOutput()) {

1129 LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC0.\n");

1130 return false;

1131 }

1132 }

1133 }

1134 return true;

1135 }

1136

1137

1138

1139

1140 bool canSinkInst(Instruction &I, const FusionCandidate &FC1) const {

1141 for (User *U : I.users()) {

1143

1144

1146

1147

1148 return false;

1149 }

1150 }

1151 }

1152

1153

1154 if (I.mayReadOrWriteMemory())

1155 return true;

1156

1157 for (Instruction *ReadInst : FC1.MemReads) {

1158 if (auto D = DI.depends(&I, ReadInst)) {

1159

1160 if (D->isFlow()) {

1161 LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC1.\n");

1162 return false;

1163 }

1164 }

1165 }

1166

1167 for (Instruction *WriteInst : FC1.MemWrites) {

1168 if (auto D = DI.depends(&I, WriteInst)) {

1169

1170 if (D->isOutput() || D->isAnti()) {

1171 LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC1.\n");

1172 return false;

1173 }

1174 }

1175 }

1176

1177 return true;

1178 }

1179

1180

1181

1182

1183

1184

1186 const FusionCandidate &FC0,

1187 const FusionCandidate &FC1) const {

1188 for (Instruction *Inst : SafeToSink) {

1189

1191 if (!Phi)

1192 continue;

1193 for (unsigned I = 0; I < Phi->getNumIncomingValues(); I++) {

1194 if (Phi->getIncomingBlock(I) != FC0.Latch)

1195 continue;

1196 assert(FC1.Latch && "FC1 latch is not set");

1197 Phi->setIncomingBlock(I, FC1.Latch);

1198 }

1199 }

1200 }

1201

1202

1203

1204 bool collectMovablePreheaderInsts(

1205 const FusionCandidate &FC0, const FusionCandidate &FC1,

1206 SmallVector<Instruction *, 4> &SafeToHoist,

1207 SmallVector<Instruction *, 4> &SafeToSink) const {

1208 BasicBlock *FC1Preheader = FC1.Preheader;

1209

1210

1211 SmallVector<Instruction *, 4> NotHoisting;

1212

1213 for (Instruction &I : *FC1Preheader) {

1214

1215 if (&I == FC1Preheader->getTerminator())

1216 continue;

1217

1218

1219

1220

1221 if (I.mayThrow() || I.willReturn()) {

1222 LLVM_DEBUG(dbgs() << "Inst: " << I << " may throw or won't return.\n");

1223 return false;

1224 }

1225

1227

1228 if (I.isAtomic() || I.isVolatile()) {

1230 dbgs() << "\tInstruction is volatile or atomic. Cannot move it.\n");

1231 return false;

1232 }

1233

1234 if (canHoistInst(I, SafeToHoist, NotHoisting, FC0)) {

1237 } else {

1238 LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");

1240

1241 if (canSinkInst(I, FC1)) {

1244 } else {

1246 return false;

1247 }

1248 }

1249 }

1251 dbgs() << "All preheader instructions could be sunk or hoisted!\n");

1252 return true;

1253 }

1254

1255

1256 class AddRecLoopReplacer : public SCEVRewriteVisitor {

1257 public:

1258 AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,

1259 bool UseMax = true)

1260 : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL),

1261 NewL(NewL) {}

1262

1263 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {

1264 const Loop *ExprL = Expr->getLoop();

1266 if (ExprL == &OldL) {

1269 }

1270

1271 if (OldL.contains(ExprL)) {

1273 if (!UseMax || !Pos || !Expr->isAffine()) {

1274 Valid = false;

1275 return Expr;

1276 }

1278 }

1279

1280 for (const SCEV *Op : Expr->operands())

1283 }

1284

1285 bool wasValidSCEV() const { return Valid; }

1286

1287 private:

1288 bool Valid, UseMax;

1289 const Loop &OldL, &NewL;

1290 };

1291

1292

1293

1294 bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,

1295 Instruction &I1, bool EqualIsInvalid) {

1298 if (!Ptr0 || !Ptr1)

1299 return false;

1300

1301 const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0);

1302 const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1);

1303#ifndef NDEBUG

1305 LLVM_DEBUG(dbgs() << " Access function check: " << *SCEVPtr0 << " vs "

1306 << *SCEVPtr1 << "\n");

1307#endif

1308 AddRecLoopReplacer Rewriter(SE, L0, L1);

1309 SCEVPtr0 = Rewriter.visit(SCEVPtr0);

1310#ifndef NDEBUG

1312 LLVM_DEBUG(dbgs() << " Access function after rewrite: " << *SCEVPtr0

1313 << " [Valid: " << Rewriter.wasValidSCEV() << "]\n");

1314#endif

1315 if (Rewriter.wasValidSCEV())

1316 return false;

1317

1318

1319

1320

1321

1323 auto HasNonLinearDominanceRelation = [&](const SCEV *S) {

1325 if (!AddRec)

1326 return false;

1327 return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) &&

1329 };

1330 if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation))

1331 return false;

1332

1333 ICmpInst::Predicate Pred =

1334 EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;

1335 bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1);

1336#ifndef NDEBUG

1339 << (IsAlwaysGE ? " >= " : " may < ") << *SCEVPtr1

1340 << "\n");

1341#endif

1342 return IsAlwaysGE;

1343 }

1344

1345

1346

1347

1348 bool dependencesAllowFusion(const FusionCandidate &FC0,

1349 const FusionCandidate &FC1, Instruction &I0,

1350 Instruction &I1, bool AnyDep,

1352#ifndef NDEBUG

1354 LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "

1355 << DepChoice << "\n");

1356 }

1357#endif

1358 switch (DepChoice) {

1360 return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep);

1362 auto DepResult = DI.depends(&I0, &I1);

1363 if (!DepResult)

1364 return true;

1365#ifndef NDEBUG

1368 dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "

1369 << (DepResult->isOrdered() ? "true" : "false")

1370 << "]\n");

1371 LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()

1372 << "\n");

1373 }

1374#endif

1375 unsigned Levels = DepResult->getLevels();

1376 unsigned SameSDLevels = DepResult->getSameSDLevels();

1377 unsigned CurLoopLevel = FC0.L->getLoopDepth();

1378

1379

1380 if (CurLoopLevel > Levels + SameSDLevels)

1381 return false;

1382

1383

1384 for (unsigned Level = 1; Level <= std::min(CurLoopLevel - 1, Levels);

1386 unsigned Direction = DepResult->getDirection(Level, false);

1387

1388

1389

1390

1392 LLVM_DEBUG(dbgs() << "Safe to fuse due to non-equal acceses in the "

1393 "outer loops\n");

1394 NumDA++;

1395 return true;

1396 }

1397 }

1398

1399 assert(CurLoopLevel > Levels && "Fusion candidates are not separated");

1400

1401 unsigned CurDir = DepResult->getDirection(CurLoopLevel, true);

1402

1403

1404

1405

1406

1407

1408

1409

1411 LLVM_DEBUG(dbgs() << "Safe to fuse with no backward loop-carried "

1412 "dependency\n");

1413 NumDA++;

1414 return true;

1415 }

1416

1417 if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())

1419 dbgs() << "TODO: Implement pred/succ dependence handling!\n");

1420

1421

1422 return false;

1423 }

1424

1426 return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,

1428 dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,

1430 }

1431

1432 llvm_unreachable("Unknown fusion dependence analysis choice!");

1433 }

1434

1435

1436 bool dependencesAllowFusion(const FusionCandidate &FC0,

1437 const FusionCandidate &FC1) {

1438 LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1

1439 << "\n");

1441 assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()));

1442

1443 for (Instruction *WriteL0 : FC0.MemWrites) {

1444 for (Instruction *WriteL1 : FC1.MemWrites)

1445 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,

1446 false,

1448 InvalidDependencies++;

1449 return false;

1450 }

1451 for (Instruction *ReadL1 : FC1.MemReads)

1452 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,

1453 false,

1455 InvalidDependencies++;

1456 return false;

1457 }

1458 }

1459

1460 for (Instruction *WriteL1 : FC1.MemWrites) {

1461 for (Instruction *WriteL0 : FC0.MemWrites)

1462 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,

1463 false,

1465 InvalidDependencies++;

1466 return false;

1467 }

1468 for (Instruction *ReadL0 : FC0.MemReads)

1469 if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,

1470 false,

1472 InvalidDependencies++;

1473 return false;

1474 }

1475 }

1476

1477

1478

1479 for (BasicBlock *BB : FC1.L->blocks())

1480 for (Instruction &I : *BB)

1481 for (auto &Op : I.operands())

1483 if (FC0.L->contains(Def->getParent())) {

1484 InvalidDependencies++;

1485 return false;

1486 }

1487

1488 return true;

1489 }

1490

1491

1492

1493

1494

1495

1496

1497

1498

1499

1500 bool isAdjacent(const FusionCandidate &FC0,

1501 const FusionCandidate &FC1) const {

1502

1503 if (FC0.GuardBranch)

1504 return FC0.getNonLoopBlock() == FC1.getEntryBlock();

1505 else

1506 return FC0.ExitBlock == FC1.getEntryBlock();

1507 }

1508

1509 bool isEmptyPreheader(const FusionCandidate &FC) const {

1510 return FC.Preheader->size() == 1;

1511 }

1512

1513

1514

1515 void movePreheaderInsts(const FusionCandidate &FC0,

1516 const FusionCandidate &FC1,

1517 SmallVector<Instruction *, 4> &HoistInsts,

1518 SmallVector<Instruction *, 4> &SinkInsts) const {

1519

1520 assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 &&

1521 "Attempting to sink and hoist preheader instructions, but not all "

1522 "the preheader instructions are accounted for.");

1523

1524 NumHoistedInsts += HoistInsts.size();

1525 NumSunkInsts += SinkInsts.size();

1526

1528 if (!HoistInsts.empty())

1529 dbgs() << "Hoisting: \n";

1530 for (Instruction *I : HoistInsts)

1531 dbgs() << *I << "\n";

1532 if (!SinkInsts.empty())

1533 dbgs() << "Sinking: \n";

1534 for (Instruction *I : SinkInsts)

1535 dbgs() << *I << "\n";

1536 });

1537

1538 for (Instruction *I : HoistInsts) {

1539 assert(I->getParent() == FC1.Preheader);

1540 I->moveBefore(*FC0.Preheader,

1542 }

1543

1544 for (Instruction *I : reverse(SinkInsts)) {

1545 assert(I->getParent() == FC1.Preheader);

1547 }

1548

1549

1550 fixPHINodes(SinkInsts, FC0, FC1);

1551 }

1552

1553

1554

1555

1556

1557

1558

1559

1560

1561

1562

1563

1564

1565 bool haveIdenticalGuards(const FusionCandidate &FC0,

1566 const FusionCandidate &FC1) const {

1567 assert(FC0.GuardBranch && FC1.GuardBranch &&

1568 "Expecting FC0 and FC1 to be guarded loops.");

1569

1570 if (auto FC0CmpInst =

1572 if (auto FC1CmpInst =

1574 if (!FC0CmpInst->isIdenticalTo(FC1CmpInst))

1575 return false;

1576

1577

1578

1579

1580 if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader)

1581 return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader);

1582 else

1583 return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);

1584 }

1585

1586

1587

1588 void simplifyLatchBranch(const FusionCandidate &FC) const {

1590 if (FCLatchBranch) {

1593 "Expecting the two successors of FCLatchBranch to be the same");

1594 BranchInst *NewBranch =

1597 }

1598 }

1599

1600

1601

1602 void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {

1606 DTU.flush();

1607 }

1608 }

1609

1610

1611

1612

1613

1614

1615

1616

1617

1618

1619

1620

1621

1622

1623

1624

1625

1626

1627

1628

1629

1630

1631

1632

1633

1634

1635

1636

1637

1638

1639 Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {

1640 assert(FC0.isValid() && FC1.isValid() &&

1641 "Expecting valid fusion candidates");

1642

1643 LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();

1644 dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););

1645

1646

1647

1649

1650

1651

1652

1653 if (FC0.GuardBranch)

1654 return fuseGuardedLoops(FC0, FC1);

1655

1656 assert(FC1.Preheader ==

1657 (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));

1658 assert(FC1.Preheader->size() == 1 &&

1660

1661

1662

1663

1664

1665

1666

1667

1668

1669

1671 if (FC0.ExitingBlock != FC0.Latch)

1672 for (PHINode &PHI : FC0.Header->phis())

1674

1675

1678

1679

1681

1682

1683

1684

1685

1686

1687

1688

1689

1690

1691

1692

1693

1694

1695

1696

1697

1698

1699 if (!FC0.Peeled) {

1701 FC1.Header);

1702 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1703 DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));

1704 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1705 DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));

1706 } else {

1707 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1708 DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));

1709

1710

1712 FC1.Header);

1713 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1714 DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));

1716 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1717 DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));

1718 new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);

1719 }

1720

1721

1724 new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);

1725 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1726 DominatorTree::Delete, FC1.Preheader, FC1.Header));

1727

1728

1730 if (SE.isSCEVable(PHI->getType()))

1731 SE.forgetValue(PHI);

1732 if (PHI->hasNUsesOrMore(1))

1734 else

1735 PHI->eraseFromParent();

1736 }

1737

1738

1739

1740

1741

1743 for (PHINode *LCPHI : OriginalFC0PHIs) {

1744 int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);

1745 assert(L1LatchBBIdx >= 0 &&

1746 "Expected loop carried value to be rewired at this point!");

1747

1748 Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);

1749

1750 PHINode *L1HeaderPHI =

1753 L1HeaderPHI->addIncoming(LCV, FC0.Latch);

1755 FC0.ExitingBlock);

1756

1757 LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);

1758 }

1759

1760

1763

1764

1765

1766 simplifyLatchBranch(FC0);

1767

1768

1769

1770 if (FC0.Latch != FC0.ExitingBlock)

1771 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1772 DominatorTree::Insert, FC0.Latch, FC1.Header));

1773

1774 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,

1775 FC0.Latch, FC0.Header));

1776 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,

1777 FC1.Latch, FC0.Header));

1778 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,

1779 FC1.Latch, FC1.Header));

1780

1781

1782 DTU.applyUpdates(TreeUpdates);

1783

1784 LI.removeBlock(FC1.Preheader);

1785 DTU.deleteBB(FC1.Preheader);

1786 if (FC0.Peeled) {

1787 LI.removeBlock(FC0.ExitBlock);

1788 DTU.deleteBB(FC0.ExitBlock);

1789 }

1790

1791 DTU.flush();

1792

1793

1794

1795

1796

1797 SE.forgetLoop(FC1.L);

1798 SE.forgetLoop(FC0.L);

1799

1800

1801

1802 mergeLatch(FC0, FC1);

1803

1804

1805

1806

1807 SE.forgetBlockAndLoopDispositions();

1808

1809

1810 SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());

1811 for (BasicBlock *BB : Blocks) {

1814 if (LI.getLoopFor(BB) != FC1.L)

1815 continue;

1816 LI.changeLoopFor(BB, FC0.L);

1817 }

1819 const auto &ChildLoopIt = FC1.L->begin();

1820 Loop *ChildLoop = *ChildLoopIt;

1823 }

1824

1825

1826 LI.erase(FC1.L);

1827

1828#ifndef NDEBUG

1830 assert(DT.verify(DominatorTree::VerificationLevel::Fast));

1831 assert(PDT.verify());

1832 LI.verify(DT);

1833 SE.verify();

1834#endif

1835

1837

1838 return FC0.L;

1839 }

1840

1841

1842

1843

1844

1845

1846

1847

1848

1849

1850

1851

1852

1853 template

1854 void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,

1856 assert(FC0.Preheader && FC1.Preheader &&

1857 "Expecting valid fusion candidates");

1858 using namespace ore;

1859#if LLVM_ENABLE_STATS

1860 ++Stat;

1862 FC0.Preheader)

1864 << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))

1865 << " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))

1866 << ": " << Stat.getDesc());

1867#endif

1868 }

1869

1870

1871

1872

1873

1874

1875

1876

1877

1878

1879

1880

1881

1882

1883

1884

1885 Loop *fuseGuardedLoops(const FusionCandidate &FC0,

1886 const FusionCandidate &FC1) {

1887 assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops");

1888

1891 BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();

1892 BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();

1894

1895

1896

1897

1898

1900 (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,

1901 DT, PDT, DI);

1902

1903

1904

1906

1907 assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");

1908

1910

1911

1912

1913

1914

1915

1916

1917

1918

1920 FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);

1921

1922 BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;

1924

1925

1927 new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock);

1928

1929 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1930 DominatorTree::Delete, FC1GuardBlock, FC1.Preheader));

1931 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1932 DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock));

1933 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1934 DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock));

1935 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1936 DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));

1937

1938 if (FC0.Peeled) {

1939

1940 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1941 DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));

1943 new UnreachableInst(FC0ExitBlockSuccessor->getContext(),

1944 FC0ExitBlockSuccessor);

1945 }

1946

1948 "Expecting guard block to have no predecessors");

1950 "Expecting guard block to have no successors");

1951

1952

1953

1954

1955

1956

1957

1958

1959

1960

1961

1962

1963

1965 if (FC0.ExitingBlock != FC0.Latch)

1966 for (PHINode &PHI : FC0.Header->phis())

1968

1969 assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!");

1970

1971

1974

1975

1976

1977

1978

1979

1980

1981

1982

1983

1984

1985

1986

1987

1989 FC1.Header);

1990

1991 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1992 DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));

1993 TreeUpdates.emplace_back(DominatorTree::UpdateType(

1994 DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));

1995

1996

1997

1998

1999

2000

2001

2002

2003 assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty");

2005 new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);

2006

2007

2008

2011 new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);

2012 TreeUpdates.emplace_back(DominatorTree::UpdateType(

2013 DominatorTree::Delete, FC1.Preheader, FC1.Header));

2014

2015

2017 if (SE.isSCEVable(PHI->getType()))

2018 SE.forgetValue(PHI);

2019 if (PHI->hasNUsesOrMore(1))

2021 else

2022 PHI->eraseFromParent();

2023 }

2024

2025

2026

2027

2028

2030 for (PHINode *LCPHI : OriginalFC0PHIs) {

2031 int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);

2032 assert(L1LatchBBIdx >= 0 &&

2033 "Expected loop carried value to be rewired at this point!");

2034

2035 Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);

2036

2037 PHINode *L1HeaderPHI =

2040 L1HeaderPHI->addIncoming(LCV, FC0.Latch);

2042 FC0.ExitingBlock);

2043

2044 LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);

2045 }

2046

2047

2048

2049

2052

2053

2054

2055 simplifyLatchBranch(FC0);

2056

2057

2058

2059 if (FC0.Latch != FC0.ExitingBlock)

2060 TreeUpdates.emplace_back(DominatorTree::UpdateType(

2061 DominatorTree::Insert, FC0.Latch, FC1.Header));

2062

2063 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,

2064 FC0.Latch, FC0.Header));

2065 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,

2066 FC1.Latch, FC0.Header));

2067 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,

2068 FC1.Latch, FC1.Header));

2069

2070

2071

2072

2073 assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!");

2074 assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!");

2075

2076

2077 DTU.applyUpdates(TreeUpdates);

2078

2079 LI.removeBlock(FC1GuardBlock);

2080 LI.removeBlock(FC1.Preheader);

2081 LI.removeBlock(FC0.ExitBlock);

2082 if (FC0.Peeled) {

2083 LI.removeBlock(FC0ExitBlockSuccessor);

2084 DTU.deleteBB(FC0ExitBlockSuccessor);

2085 }

2086 DTU.deleteBB(FC1GuardBlock);

2087 DTU.deleteBB(FC1.Preheader);

2088 DTU.deleteBB(FC0.ExitBlock);

2089 DTU.flush();

2090

2091

2092

2093

2094

2095 SE.forgetLoop(FC1.L);

2096 SE.forgetLoop(FC0.L);

2097

2098

2099

2100 mergeLatch(FC0, FC1);

2101

2102

2103

2104

2105 SE.forgetBlockAndLoopDispositions();

2106

2107

2108 SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());

2109 for (BasicBlock *BB : Blocks) {

2112 if (LI.getLoopFor(BB) != FC1.L)

2113 continue;

2114 LI.changeLoopFor(BB, FC0.L);

2115 }

2117 const auto &ChildLoopIt = FC1.L->begin();

2118 Loop *ChildLoop = *ChildLoopIt;

2121 }

2122

2123

2124 LI.erase(FC1.L);

2125

2126#ifndef NDEBUG

2128 assert(DT.verify(DominatorTree::VerificationLevel::Fast));

2129 assert(PDT.verify());

2130 LI.verify(DT);

2131 SE.verify();

2132#endif

2133

2135

2136 return FC0.L;

2137 }

2138};

2139}

2140

2151

2152

2153

2154

2156 for (auto &L : LI) {

2158 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false );

2159 }

2161 PDT.recalculate(F);

2162

2163 LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);

2167

2173 return PA;

2174}

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static bool reportInvalidCandidate(const Instruction &I, llvm::Statistic &Stat)

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

static void printFusionCandidates(const FusionCandidateCollection &FusionCandidates)

Definition LoopFuse.cpp:491

static cl::opt< FusionDependenceAnalysisChoice > FusionDependenceAnalysis("loop-fusion-dependence-analysis", cl::desc("Which dependence analysis should loop fusion use?"), cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev", "Use the scalar evolution interface"), clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da", "Use the dependence analysis interface"), clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all", "Use all available analyses")), cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL))

static void printLoopVector(const LoopVector &LV)

Definition LoopFuse.cpp:466

std::set< FusionCandidate, FusionCandidateCompare > FusionCandidateSet

Definition LoopFuse.cpp:462

SmallVector< Loop *, 4 > LoopVector

Definition LoopFuse.cpp:450

SmallVector< FusionCandidateSet, 4 > FusionCandidateCollection

Definition LoopFuse.cpp:463

FusionDependenceAnalysisChoice

Definition LoopFuse.cpp:105

@ FUSION_DEPENDENCE_ANALYSIS_DA

Definition LoopFuse.cpp:107

@ FUSION_DEPENDENCE_ANALYSIS_ALL

Definition LoopFuse.cpp:108

@ FUSION_DEPENDENCE_ANALYSIS_SCEV

Definition LoopFuse.cpp:106

static cl::opt< bool > VerboseFusionDebugging("loop-fusion-verbose-debug", cl::desc("Enable verbose debugging for Loop Fusion"), cl::Hidden, cl::init(false))

static cl::opt< unsigned > FusionPeelMaxCount("loop-fusion-peel-max-count", cl::init(0), cl::Hidden, cl::desc("Max number of iterations to be peeled from a loop, such that " "fusion can take place"))

#define DEBUG_TYPE

Definition LoopFuse.cpp:70

This file implements the Loop Fusion pass.

Loop::LoopBounds::Direction Direction

static bool isValid(const char C)

Returns true if C is a valid mangled character: <0-9a-zA-Z_>.

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

Virtual Register Rewriter

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

A function analysis which provides an AssumptionCache.

LLVM Basic Block Representation.

LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)

Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...

iterator begin()

Instruction iterator methods.

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

LLVM_ABI const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const

Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...

LLVM_ABI const BasicBlock * getUniqueSuccessor() const

Return the successor of this block if it has a unique successor.

const Instruction & front() const

LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)

Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.

LLVM_ABI const BasicBlock * getSingleSuccessor() const

Return the successor of this block if it has a single successor.

InstListType::iterator iterator

Instruction iterators...

LLVM_ABI LLVMContext & getContext() const

Get the context in which this basic block lives.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Conditional or Unconditional Branch instruction.

bool isConditional() const

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

BasicBlock * getSuccessor(unsigned i) const

Value * getCondition() const

A parsed version of the target data layout string in and methods for querying it.

AnalysisPass to compute dependence information in a function.

unsigned getLevel() const

Analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

LLVM_ABI void insertBefore(InstListType::iterator InsertPos)

Insert an unlinked instruction into a basic block immediately before the specified position.

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

Analysis pass that exposes the LoopInfo for a function.

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

bool isInnermost() const

Return true if the loop does not contain any (natural) loops.

void removeBlockFromLoop(BlockT *BB)

This removes the specified basic block from the current loop, updating the Blocks as appropriate.

BlockT * getHeader() const

unsigned getLoopDepth() const

Return the nesting level of this loop.

iterator_range< block_iterator > blocks() const

void addChildLoop(LoopT *NewChild)

Add the specified loop to be a child of this loop.

void addBlockEntry(BlockT *BB)

This adds a basic block directly to the basic block list.

LoopT * removeChildLoop(iterator I)

This removes the specified child from being a subloop of this loop.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition LoopFuse.cpp:2141

reverse_iterator rend() const

reverse_iterator rbegin() const

Represents a single loop in the control flow graph.

DebugLoc getStartLoc() const

Return the debug location of the start of this loop.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Analysis pass which computes a PostDominatorTree.

PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

const SCEV * getStart() const

const SCEV * getStepRecurrence(ScalarEvolution &SE) const

Constructs and returns the recurrence indicating how much this expression steps by.

bool isAffine() const

Return true if this represents an expression A + B*x where A and B are loop invariant values.

const Loop * getLoop() const

NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const

ArrayRef< const SCEV * > operands() const

Analysis pass that exposes the ScalarEvolution for a function.

The main scalar evolution driver.

LLVM_ABI bool isKnownPositive(const SCEV *S)

Test if the given expression is known to be positive.

LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)

Get an add recurrence expression for the specified loop.

LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)

Return true if the specified loop has an analyzable loop-invariant backedge-taken count.

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Analysis pass providing the TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)

Replace uses of one Value with another.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

const ParentTy * getParent() const

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ BasicBlock

Various leaf nodes.

@ Valid

The data is already valid.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

Add a small namespace to avoid name clashes with the classes used in the streaming interface.

DiagnosticInfoOptimizationBase::Argument NV

NodeAddr< DefNode * > Def

NodeAddr< PhiNode * > Phi

LLVM_ABI iterator begin() const

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)

Simplify each loop in a loop nest recursively.

LLVM_ABI void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)

Replace the instruction specified by BI with the instruction specified by I.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

FunctionAddr VTableAddr Value

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

bool succ_empty(const Instruction *I)

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI bool verifyFunction(const Function &F, raw_ostream *OS=nullptr)

Check a function for errors, useful for use when debugging a pass.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

bool canPeel(const Loop *L)

LLVM_ABI void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI)

Move instructions, in an order-preserving manner, from FromBB to the end of ToBB when proven safe.

DomTreeNodeBase< BasicBlock > DomTreeNode

auto reverse(ContainerTy &&C)

TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI bool isControlFlowEquivalent(const Instruction &I0, const Instruction &I1, const DominatorTree &DT, const PostDominatorTree &PDT)

Return true if I0 and I1 are control flow equivalent.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

LLVM_ABI bool nonStrictlyPostDominate(const BasicBlock *ThisBlock, const BasicBlock *OtherBlock, const DominatorTree *DT, const PostDominatorTree *PDT)

In case that two BBs ThisBlock and OtherBlock are control flow equivalent but they do not strictly do...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

LLVM_ABI void moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI)

Move instructions, in an order-preserving manner, from FromBB to the beginning of ToBB when proven sa...

LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)

Attempts to merge a block into its predecessor, if possible.

LLVM_ABI void printLoop(const Loop &L, raw_ostream &OS, const std::string &Banner="")

Function to print a loop's contents as LLVM's text IR assembly.

DWARFExpression::Operation Op

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

ArrayRef(const T &OneElt) -> ArrayRef< T >

ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy

auto predecessors(const MachineBasicBlock *BB)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

bool pred_empty(const BasicBlock *BB)

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, DominatorTree &DT, const PostDominatorTree *PDT=nullptr, DependenceInfo *DI=nullptr, bool CheckForEntireBlock=false)

Return true if I can be safely moved before InsertPoint.

bool peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)

VMap is the value-map that maps instructions from the original loop to instructions in the last peele...

bool SCEVExprContains(const SCEV *Root, PredTy Pred)

Return true if any node in Root satisfies the predicate Pred.

unsigned PeelCount

A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...