LLVM: lib/CodeGen/MachineBlockPlacement.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

67#include

68#include

69#include

70#include

71#include

72#include

73#include

74#include

75#include

76

77using namespace llvm;

78

79#define DEBUG_TYPE "block-placement"

80

81STATISTIC(NumCondBranches, "Number of conditional branches");

82STATISTIC(NumUncondBranches, "Number of unconditional branches");

84 "Potential frequency of taking conditional branches");

86 "Potential frequency of taking unconditional branches");

87

89 "align-all-blocks",

90 cl::desc("Force the alignment of all blocks in the function in log2 format "

91 "(e.g 4 means align on 16B boundaries)."),

93

95 "align-all-nofallthru-blocks",

96 cl::desc("Force the alignment of all blocks that have no fall-through "

97 "predecessors (i.e. don't add nops that are executed). In log2 "

98 "format (e.g 4 means align on 16B boundaries)."),

100

102 "max-bytes-for-alignment",

103 cl::desc("Forces the maximum bytes allowed to be emitted when padding for "

104 "alignment"),

106

108 "block-placement-predecessor-limit",

109 cl::desc("For blocks with more predecessors, certain layout optimizations"

110 "will be disabled to prevent quadratic compile time."),

112

113

115 "block-placement-exit-block-bias",

116 cl::desc("Block frequency percentage a loop exit block needs "

117 "over the original exit to be considered the new exit."),

119

120

121

122

124 "loop-to-cold-block-ratio",

125 cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "

126 "(frequency of block) is greater than this ratio"),

128

131 cl::desc("Force outlining cold blocks from loops."),

133

136 cl::desc("Model the cost of loop rotation more "

137 "precisely by using profile data."),

139

142 cl::desc("Force the use of precise cost "

143 "loop rotation strategy."),

145

147 "misfetch-cost",

148 cl::desc("Cost that models the probabilistic risk of an instruction "

149 "misfetch due to a jump comparing to falling through, whose cost "

150 "is zero."),

152

154 cl::desc("Cost of jump instructions."),

158 cl::desc("Perform tail duplication during placement. "

159 "Creates more fallthrough opportunities in "

160 "outline branches."),

162

165 cl::desc("Perform branch folding during placement. "

166 "Reduces code size."),

168

169

171 "tail-dup-placement-threshold",

172 cl::desc("Instruction cutoff for tail duplication during layout. "

173 "Tail merging during layout is forced to have a threshold "

174 "that won't conflict."),

176

177

179 "tail-dup-placement-aggressive-threshold",

180 cl::desc("Instruction cutoff for aggressive tail duplication during "

181 "layout. Used at -O3. Tail merging during layout is forced to "

182 "have a threshold that won't conflict."),

184

185

187 "tail-dup-placement-penalty",

189 "Cost penalty for blocks that can avoid breaking CFG by copying. "

190 "Copying can increase fallthrough, but it also increases icache "

191 "pressure. This parameter controls the penalty to account for that. "

192 "Percent as integer."),

194

195

197 "tail-dup-profile-percent-threshold",

198 cl::desc("If profile count information is used in tail duplication cost "

199 "model, the gained fall through number from tail duplication "

200 "should be at least this percent of hot count."),

202

203

205 "triangle-chain-count",

206 cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "

207 "triangle tail duplication heuristic to kick in. 0 to disable."),

209

210

211

212

213

214

216 "renumber-blocks-before-view",

218 "If true, basic blocks are re-numbered before MBP layout is printed "

219 "into a dot graph. Only used when a function is being printed."),

221

223 "ext-tsp-block-placement-max-blocks",

224 cl::desc("Maximum number of basic blocks in a function to run ext-TSP "

225 "block placement."),

227

228

231 cl::desc("Use ext-tsp for size-aware block placement."));

232

233namespace llvm {

238

239

240

241

243

244

245

247}

248

249namespace {

250

251class BlockChain;

252

253

255

256

257

258

259

260

261

262

263

264

265

266

267class BlockChain {

268

269

270

271

273

274

275

276

277

278

279

280 BlockToChainMapType &BlockToChain;

281

282public:

283

284

285

286

287

288 BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)

289 : Blocks(1, BB), BlockToChain(BlockToChain) {

290 assert(BB && "Cannot create a chain with a null basic block");

291 BlockToChain[BB] = this;

292 }

293

294

297

298

301

302

305

307 for (iterator i = begin(); i != end(); ++i) {

308 if (*i == BB) {

310 return true;

311 }

312 }

313 return false;

314 }

315

316

317

318

319

320

321

323 assert(BB && "Can't merge a null block.");

324 assert(!Blocks.empty() && "Can't merge into an empty chain.");

325

326

327 if (!Chain) {

328 assert(!BlockToChain[BB] &&

329 "Passed chain is null, but BB has entry in BlockToChain.");

331 BlockToChain[BB] = this;

332 return;

333 }

334

335 assert(BB == *Chain->begin() && "Passed BB is not head of Chain.");

336 assert(Chain->begin() != Chain->end());

337

338

339

342 assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain.");

343 BlockToChain[ChainBB] = this;

344 }

345 }

346

347#ifndef NDEBUG

348

351 MBB->dump();

352 }

353#endif

354

355

356

357

358

359

360

361

362

363

364 unsigned UnscheduledPredecessors = 0;

365};

366

367class MachineBlockPlacement {

368

369 using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;

370

371

372 struct BlockAndTailDupResult {

373 MachineBasicBlock *BB = nullptr;

374 bool ShouldTailDup;

375 };

376

377

378 struct WeightedEdge {

379 BlockFrequency Weight;

380 MachineBasicBlock *Src = nullptr;

381 MachineBasicBlock *Dest = nullptr;

382 };

383

384

387

388

389 DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;

390

391

392 MachineFunction *F = nullptr;

393

394

395 const MachineBranchProbabilityInfo *MBPI = nullptr;

396

397

398 std::unique_ptr MBFI;

399

400

401 MachineLoopInfo *MLI = nullptr;

402

403

404

405

406 MachineBasicBlock *PreferredLoopExit = nullptr;

407

408

409 const TargetInstrInfo *TII = nullptr;

410

411

412 const TargetLoweringBase *TLI = nullptr;

413

414

415 MachinePostDominatorTree *MPDT = nullptr;

416

417 ProfileSummaryInfo *PSI = nullptr;

418

419

420

421 bool AllowTailMerge;

422

424

425

426

427

428

429

430 TailDuplicator TailDup;

431

432

433 BlockFrequency DupThreshold;

434

435 unsigned TailDupSize;

436

437

438

439 bool UseProfileCount = false;

440

441

442

443

444

445

446

447

448 SpecificBumpPtrAllocator ChainAllocator;

449

450

451

452

453

454

455

456 DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;

457

458#ifndef NDEBUG

459

460

461

462

463 SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;

464#endif

465

466

467

468 BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) {

469 if (UseProfileCount) {

470 auto Count = MBFI->getBlockProfileCount(BB);

472 return BlockFrequency(*Count);

473 else

474 return BlockFrequency(0);

475 } else

476 return MBFI->getBlockFreq(BB);

477 }

478

479

480 BlockFrequency scaleThreshold(MachineBasicBlock *BB);

481 void initTailDupThreshold();

482

483

484

485 void markChainSuccessors(const BlockChain &Chain,

486 const MachineBasicBlock *LoopHeaderBB,

487 const BlockFilterSet *BlockFilter = nullptr);

488

489

490

491 void markBlockSuccessors(const BlockChain &Chain, const MachineBasicBlock *BB,

492 const MachineBasicBlock *LoopHeaderBB,

493 const BlockFilterSet *BlockFilter = nullptr);

494

495 BranchProbability

496 collectViableSuccessors(const MachineBasicBlock *BB, const BlockChain &Chain,

497 const BlockFilterSet *BlockFilter,

498 SmallVector<MachineBasicBlock *, 4> &Successors);

499 bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,

500 BlockFilterSet *BlockFilter);

501 void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,

502 MachineBasicBlock *BB,

503 BlockFilterSet *BlockFilter);

504 bool repeatedlyTailDuplicateBlock(

505 MachineBasicBlock *BB, MachineBasicBlock *&LPred,

506 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,

507 BlockFilterSet *BlockFilter,

510 bool

511 maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,

512 BlockChain &Chain, BlockFilterSet *BlockFilter,

515 bool &DuplicatedToLPred);

516 bool hasBetterLayoutPredecessor(const MachineBasicBlock *BB,

517 const MachineBasicBlock *Succ,

518 const BlockChain &SuccChain,

519 BranchProbability SuccProb,

520 BranchProbability RealSuccProb,

521 const BlockChain &Chain,

522 const BlockFilterSet *BlockFilter);

523 BlockAndTailDupResult selectBestSuccessor(const MachineBasicBlock *BB,

524 const BlockChain &Chain,

525 const BlockFilterSet *BlockFilter);

526 MachineBasicBlock *

527 selectBestCandidateBlock(const BlockChain &Chain,

528 SmallVectorImpl<MachineBasicBlock *> &WorkList);

529 MachineBasicBlock *

530 getFirstUnplacedBlock(const BlockChain &PlacedChain,

532 MachineBasicBlock *

533 getFirstUnplacedBlock(const BlockChain &PlacedChain,

535 const BlockFilterSet *BlockFilter);

536

537

538

539

540

541

542 void fillWorkLists(const MachineBasicBlock *MBB,

543 SmallPtrSetImpl<BlockChain *> &UpdatedPreds,

544 const BlockFilterSet *BlockFilter);

545

546 void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,

547 BlockFilterSet *BlockFilter = nullptr);

548 bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,

549 const MachineBasicBlock *OldTop);

550 bool hasViableTopFallthrough(const MachineBasicBlock *Top,

551 const BlockFilterSet &LoopBlockSet);

552 BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,

553 const BlockFilterSet &LoopBlockSet);

554 BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,

555 const MachineBasicBlock *OldTop,

556 const MachineBasicBlock *ExitBB,

557 const BlockFilterSet &LoopBlockSet);

558 MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,

559 const MachineLoop &L,

560 const BlockFilterSet &LoopBlockSet);

561 MachineBasicBlock *findBestLoopTop(const MachineLoop &L,

562 const BlockFilterSet &LoopBlockSet);

563 MachineBasicBlock *findBestLoopExit(const MachineLoop &L,

564 const BlockFilterSet &LoopBlockSet,

565 BlockFrequency &ExitFreq);

566 BlockFilterSet collectLoopBlockSet(const MachineLoop &L);

567 void buildLoopChains(const MachineLoop &L);

568 void rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,

569 BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);

570 void rotateLoopWithProfile(BlockChain &LoopChain, const MachineLoop &L,

571 const BlockFilterSet &LoopBlockSet);

572 void buildCFGChains();

573 void optimizeBranches();

574 void alignBlocks();

575

576

577 bool shouldTailDuplicate(MachineBasicBlock *BB);

578

579

580 bool isProfitableToTailDup(const MachineBasicBlock *BB,

581 const MachineBasicBlock *Succ,

582 BranchProbability QProb, const BlockChain &Chain,

583 const BlockFilterSet *BlockFilter);

584

585

586 bool isTrellis(const MachineBasicBlock *BB,

587 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,

588 const BlockChain &Chain, const BlockFilterSet *BlockFilter);

589

590

591 BlockAndTailDupResult getBestTrellisSuccessor(

592 const MachineBasicBlock *BB,

593 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,

594 BranchProbability AdjustedSumProb, const BlockChain &Chain,

595 const BlockFilterSet *BlockFilter);

596

597

598 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(

599 const MachineBasicBlock *BB,

601

602

603

604 bool canTailDuplicateUnplacedPreds(const MachineBasicBlock *BB,

605 MachineBasicBlock *Succ,

606 const BlockChain &Chain,

607 const BlockFilterSet *BlockFilter);

608

609

610

611 void precomputeTriangleChains();

612

613

614 void applyExtTsp(bool OptForSize);

615

616

617 void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder);

618

619

620 void createCFGChainExtTsp();

621

622public:

623 MachineBlockPlacement(const MachineBranchProbabilityInfo *MBPI,

624 MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,

625 std::unique_ptr MBFI,

626 MachinePostDominatorTree *MPDT, bool AllowTailMerge)

627 : MBPI(MBPI), MBFI(std::move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),

628 AllowTailMerge(AllowTailMerge) {};

629

630 bool run(MachineFunction &F);

631

632 static bool allowTailDupPlacement(MachineFunction &MF) {

634 }

635};

636

638public:

639 static char ID;

640

641 MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {

643 }

644

645 bool runOnMachineFunction(MachineFunction &MF) override {

647 return false;

648

649 auto *MBPI =

650 &getAnalysis().getMBPI();

651 auto MBFI = std::make_unique(

652 getAnalysis().getMBFI());

653 auto *MLI = &getAnalysis().getLI();

654 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)

655 ? &getAnalysis()

656 .getPostDomTree()

657 : nullptr;

658 auto *PSI = &getAnalysis().getPSI();

659 auto *PassConfig = &getAnalysis();

660 bool AllowTailMerge = PassConfig->getEnableTailMerge();

661 return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,

662 AllowTailMerge)

663 .run(MF);

664 }

665

666 void getAnalysisUsage(AnalysisUsage &AU) const override {

667 AU.addRequired();

668 AU.addRequired();

670 AU.addRequired();

671 AU.addRequired();

672 AU.addRequired();

675 }

676};

677

678}

679

680char MachineBlockPlacementLegacy::ID = 0;

681

683

685 "Branch Probability Basic Block Placement", false, false)

692 "Branch Probability Basic Block Placement", false, false)

693

694#ifndef NDEBUG

695

696

697

699 std::string Result;

702 OS << " ('" << BB->getName() << "')";

704 return Result;

705}

706#endif

707

708

709

710

711

712

713

714void MachineBlockPlacement::markChainSuccessors(

716 const BlockFilterSet *BlockFilter) {

717

718

719 for (MachineBasicBlock *MBB : Chain) {

720 markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);

721 }

722}

723

724

725

726

727

728

729

730void MachineBlockPlacement::markBlockSuccessors(

731 const BlockChain &Chain, const MachineBasicBlock *MBB,

732 const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {

733

734

735

736

737 for (MachineBasicBlock *Succ : MBB->successors()) {

738 if (BlockFilter && !BlockFilter->count(Succ))

739 continue;

740 BlockChain &SuccChain = *BlockToChain[Succ];

741

742 if (&Chain == &SuccChain || Succ == LoopHeaderBB)

743 continue;

744

745

746

747 if (SuccChain.UnscheduledPredecessors == 0 ||

748 --SuccChain.UnscheduledPredecessors > 0)

749 continue;

750

751 auto *NewBB = *SuccChain.begin();

752 if (NewBB->isEHPad())

754 else

756 }

757}

758

759

760

761

762

763BranchProbability MachineBlockPlacement::collectViableSuccessors(

764 const MachineBasicBlock *BB, const BlockChain &Chain,

765 const BlockFilterSet *BlockFilter,

766 SmallVector<MachineBasicBlock *, 4> &Successors) {

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

784 for (MachineBasicBlock *Succ : BB->successors()) {

785 bool SkipSucc = false;

786 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {

787 SkipSucc = true;

788 } else {

789 BlockChain *SuccChain = BlockToChain[Succ];

790 if (SuccChain == &Chain) {

791 SkipSucc = true;

792 } else if (Succ != *SuccChain->begin()) {

794 << " -> Mid chain!\n");

795 continue;

796 }

797 }

798 if (SkipSucc)

800 else

802 }

803

804 return AdjustedSumProb;

805}

806

807

808

809static BranchProbability

815 if (SuccProbN >= SuccProbD)

817 else

819

820 return SuccProb;

821}

822

823

824static bool

828 return false;

829

830 if (Successors.count(&BB))

831 return false;

833 if (!Successors.count(Succ))

834 return false;

835 return true;

836}

837

838

839

840

841bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {

842

843

844

845 bool IsSimple = TailDup.isSimpleBB(BB);

846

848 return false;

850}

851

852

853

854

855

856

857

862 return (Gain / ThresholdProb) >= EntryFreq;

863}

864

865

866

867

868

869

870bool MachineBlockPlacement::isProfitableToTailDup(

871 const MachineBasicBlock *BB, const MachineBasicBlock *Succ,

872 BranchProbability QProb, const BlockChain &Chain,

873 const BlockFilterSet *BlockFilter) {

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897 MachineBasicBlock *PDom = nullptr;

898 SmallVector<MachineBasicBlock *, 4> SuccSuccs;

899

900 auto AdjustedSuccSumProb =

901 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);

903 auto BBFreq = MBFI->getBlockFreq(BB);

904 auto SuccFreq = MBFI->getBlockFreq(Succ);

905 BlockFrequency P = BBFreq * PProb;

906 BlockFrequency Qout = BBFreq * QProb;

907 BlockFrequency EntryFreq = MBFI->getEntryFreq();

908

909

910 if (SuccSuccs.size() == 0)

912

914

915 for (MachineBasicBlock *SuccSucc : SuccSuccs) {

917 if (Prob > BestSuccSucc)

918 BestSuccSucc = Prob;

919 if (PDom == nullptr)

920 if (MPDT->dominates(SuccSucc, Succ)) {

921 PDom = SuccSucc;

922 break;

923 }

924 }

925

926

927 auto SuccBestPred = BlockFrequency(0);

928 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {

929 if (SuccPred == Succ || SuccPred == BB ||

930 BlockToChain[SuccPred] == &Chain ||

931 (BlockFilter && !BlockFilter->count(SuccPred)))

932 continue;

933 auto Freq =

934 MBFI->getBlockFreq(SuccPred) * MBPI->getEdgeProbability(SuccPred, Succ);

935 if (Freq > SuccBestPred)

936 SuccBestPred = Freq;

937 }

938

939 BlockFrequency Qin = SuccBestPred;

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959 if (PDom == nullptr || !Succ->isSuccessor(PDom)) {

960 BranchProbability UProb = BestSuccSucc;

961 BranchProbability VProb = AdjustedSuccSumProb - UProb;

962 BlockFrequency F = SuccFreq - Qin;

963 BlockFrequency V = SuccFreq * VProb;

964 BlockFrequency QinU = std::min(Qin, F) * UProb;

965 BlockFrequency BaseCost = P + V;

966 BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;

968 }

970 BranchProbability VProb = AdjustedSuccSumProb - UProb;

971 BlockFrequency U = SuccFreq * UProb;

972 BlockFrequency V = SuccFreq * VProb;

973 BlockFrequency F = SuccFreq - Qin;

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

1001

1002

1003 if (UProb > AdjustedSuccSumProb / 2 &&

1004 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,

1005 Chain, BlockFilter))

1006

1008 (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),

1009 EntryFreq);

1010

1012 (Qout + std::min(Qin, F) * AdjustedSuccSumProb +

1013 std::max(Qin, F) * UProb),

1014 EntryFreq);

1015}

1016

1017

1018

1019

1020

1021

1022

1023

1024bool MachineBlockPlacement::isTrellis(

1025 const MachineBasicBlock *BB,

1026 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,

1027 const BlockChain &Chain, const BlockFilterSet *BlockFilter) {

1028

1029

1030 if (BB->succ_size() != 2 || ViableSuccs.size() != 2)

1031 return false;

1032

1033 SmallPtrSet<const MachineBasicBlock *, 2> Successors(llvm::from_range,

1035

1036 SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;

1037

1038 for (MachineBasicBlock *Succ : ViableSuccs) {

1039

1040

1042 return false;

1043

1044 int PredCount = 0;

1045 for (auto *SuccPred : Succ->predecessors()) {

1046

1047 if (Successors.count(SuccPred)) {

1048

1049 for (MachineBasicBlock *CheckSucc : SuccPred->successors())

1050 if (!Successors.count(CheckSucc))

1051 return false;

1052 continue;

1053 }

1054 const BlockChain *PredChain = BlockToChain[SuccPred];

1055 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||

1056 PredChain == &Chain || PredChain == BlockToChain[Succ])

1057 continue;

1058 ++PredCount;

1059

1060 if (!SeenPreds.insert(SuccPred).second)

1061 continue;

1063 return false;

1064 }

1065

1066

1067 if (PredCount < 1)

1068 return false;

1069 }

1070 return true;

1071}

1072

1073

1074

1075

1076

1077

1078std::pair<MachineBlockPlacement::WeightedEdge,

1079 MachineBlockPlacement::WeightedEdge>

1080MachineBlockPlacement::getBestNonConflictingEdges(

1081 const MachineBasicBlock *BB,

1083 Edges) {

1084

1085

1086

1087

1088

1089

1090

1091 auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };

1092

1095 auto BestA = Edges[0].begin();

1096 auto BestB = Edges[1].begin();

1097

1098

1099 if (BestA->Src == BestB->Src) {

1100

1101 auto SecondBestA = std::next(BestA);

1102 auto SecondBestB = std::next(BestB);

1103 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;

1104 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;

1105 if (BestAScore < BestBScore)

1106 BestA = SecondBestA;

1107 else

1108 BestB = SecondBestB;

1109 }

1110

1111 if (BestB->Src == BB)

1113 return std::make_pair(*BestA, *BestB);

1114}

1115

1116

1117

1118

1119

1120

1121

1122

1123MachineBlockPlacement::BlockAndTailDupResult

1124MachineBlockPlacement::getBestTrellisSuccessor(

1125 const MachineBasicBlock *BB,

1126 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,

1127 BranchProbability AdjustedSumProb, const BlockChain &Chain,

1128 const BlockFilterSet *BlockFilter) {

1129

1130 BlockAndTailDupResult Result = {nullptr, false};

1131 SmallPtrSet<const MachineBasicBlock *, 4> Successors(llvm::from_range,

1133

1134

1135

1136

1137 if (Successors.size() != 2 || ViableSuccs.size() != 2)

1139

1140

1142 int SuccIndex = 0;

1143 for (auto *Succ : ViableSuccs) {

1144 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {

1145

1146 if (SuccPred != BB) {

1147 if (BlockFilter && !BlockFilter->count(SuccPred))

1148 continue;

1149 const BlockChain *SuccPredChain = BlockToChain[SuccPred];

1150 if (SuccPredChain == &Chain || SuccPredChain == BlockToChain[Succ])

1151 continue;

1152 }

1153 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *

1155 Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});

1156 }

1157 ++SuccIndex;

1158 }

1159

1160

1161 WeightedEdge BestA, BestB;

1162 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);

1163

1164 if (BestA.Src != BB) {

1165

1166

1167

1168 LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");

1170 }

1171

1172

1173

1174

1175 if (BestA.Dest == BestB.Src) {

1176

1177

1178 MachineBasicBlock *Succ1 = BestA.Dest;

1179 MachineBasicBlock *Succ2 = BestB.Dest;

1180

1181 if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ2) &&

1182 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&

1183 isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),

1184 Chain, BlockFilter)) {

1188 << ", probability: " << Succ2Prob

1189 << " (Tail Duplicate)\n");

1191 Result.ShouldTailDup = true;

1193 }

1194 }

1195

1196

1197 ComputedEdges[BestB.Src] = {BestB.Dest, false};

1198

1199 auto TrellisSucc = BestA.Dest;

1203 << ", probability: " << SuccProb << " (Trellis)\n");

1204 Result.BB = TrellisSucc;

1206}

1207

1208

1209

1210

1211bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(

1212 const MachineBasicBlock *BB, MachineBasicBlock *Succ,

1213 const BlockChain &Chain, const BlockFilterSet *BlockFilter) {

1214 if (!shouldTailDuplicate(Succ))

1215 return false;

1216

1217

1218 bool Duplicate = true;

1219

1220 unsigned int NumDup = 0;

1221

1222

1223 SmallPtrSet<const MachineBasicBlock *, 4> Successors(llvm::from_range,

1225 for (MachineBasicBlock *Pred : Succ->predecessors()) {

1226

1227

1228

1229 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||

1230 (BlockToChain[Pred] == &Chain && !Succ->succ_empty()))

1231 continue;

1234

1235

1236

1237

1238

1239

1240

1241

1242

1243

1244

1245

1246

1247

1248

1249

1250

1251

1252

1253

1254

1255

1256

1257

1258

1259

1260

1261

1262

1263

1264

1265

1266

1267 continue;

1268 Duplicate = false;

1269 continue;

1270 }

1271 NumDup++;

1272 }

1273

1274

1275 if (NumDup == 0)

1276 return false;

1277

1278

1279

1280 if (F->getFunction().hasProfileData())

1281 return true;

1282

1283

1284

1285

1286

1288 return true;

1289

1290

1291 NumDup++;

1292

1293

1294

1295

1296

1297

1298

1299

1300

1301

1302

1303

1304

1305

1306

1307

1308

1309

1310

1311 if ((NumDup > Succ->succ_size()) || !Duplicate)

1312 return false;

1313

1314 return true;

1315}

1316

1317

1318

1319

1320

1321

1322

1323

1324

1325

1326

1327

1328

1329

1330

1331

1332

1333

1334void MachineBlockPlacement::precomputeTriangleChains() {

1335 struct TriangleChain {

1336 std::vector<MachineBasicBlock *> Edges;

1337

1338 TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)

1339 : Edges({src, dst}) {}

1340

1341 void append(MachineBasicBlock *dst) {

1342 assert(getKey()->isSuccessor(dst) &&

1343 "Attempting to append a block that is not a successor.");

1344 Edges.push_back(dst);

1345 }

1346

1347 unsigned count() const { return Edges.size() - 1; }

1348

1349 MachineBasicBlock *getKey() const { return Edges.back(); }

1350 };

1351

1353 return;

1354

1355 LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n");

1356

1357

1358 DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;

1359 for (MachineBasicBlock &BB : *F) {

1360

1362 continue;

1363 MachineBasicBlock *PDom = nullptr;

1364 for (MachineBasicBlock *Succ : BB.successors()) {

1365 if (!MPDT->dominates(Succ, &BB))

1366 continue;

1367 PDom = Succ;

1368 break;

1369 }

1370

1371

1372 if (PDom == nullptr)

1373 continue;

1374

1375 if (MBPI->getEdgeProbability(&BB, PDom) < BranchProbability(50, 100))

1376 continue;

1377

1378

1379 if (!shouldTailDuplicate(PDom))

1380 continue;

1381 bool CanTailDuplicate = true;

1382

1383

1384 for (MachineBasicBlock *Pred : PDom->predecessors()) {

1385 if (Pred == &BB)

1386 continue;

1388 CanTailDuplicate = false;

1389 break;

1390 }

1391 }

1392

1393

1394 if (!CanTailDuplicate)

1395 continue;

1396

1397

1398

1399

1400

1401 auto Found = TriangleChainMap.find(&BB);

1402

1403

1404 if (Found != TriangleChainMap.end()) {

1405 TriangleChain Chain = std::move(Found->second);

1406 TriangleChainMap.erase(Found);

1407 Chain.append(PDom);

1408 TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));

1409 } else {

1410 auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);

1411 assert(InsertResult.second && "Block seen twice.");

1412 (void)InsertResult;

1413 }

1414 }

1415

1416

1417

1418

1419 for (auto &ChainPair : TriangleChainMap) {

1420 TriangleChain &Chain = ChainPair.second;

1421

1422

1423

1425 continue;

1426 MachineBasicBlock *dst = Chain.Edges.back();

1427 Chain.Edges.pop_back();

1428 for (MachineBasicBlock *src : reverse(Chain.Edges)) {

1431 << " as pre-computed based on triangles.\n");

1432

1433 auto InsertResult = ComputedEdges.insert({src, {dst, true}});

1434 assert(InsertResult.second && "Block seen twice.");

1435 (void)InsertResult;

1436

1437 dst = src;

1438 }

1439 }

1440}

1441

1442

1443

1444static BranchProbability

1452

1453

1454

1455

1456

1457

1458

1459

1460

1461

1463 }

1464 }

1466}

1467

1468

1469

1470

1471

1472

1473

1474

1475

1476bool MachineBlockPlacement::hasBetterLayoutPredecessor(

1477 const MachineBasicBlock *BB, const MachineBasicBlock *Succ,

1478 const BlockChain &SuccChain, BranchProbability SuccProb,

1479 BranchProbability RealSuccProb, const BlockChain &Chain,

1480 const BlockFilterSet *BlockFilter) {

1481

1482

1483 if (SuccChain.UnscheduledPredecessors == 0)

1484 return false;

1485

1486

1487

1489 return false;

1490

1491

1492

1493

1494

1495

1496

1497

1498

1499

1500

1501

1502

1503

1504

1505

1506

1507

1508

1509

1510

1511

1512

1513

1514

1515

1516

1517

1518

1519

1520

1521

1522

1523

1524

1525

1526

1527

1528

1529

1530

1531

1532

1533

1534

1535

1536

1537

1538

1539

1540

1541

1542

1543

1544

1545

1546

1547

1548

1549

1550

1551

1552

1553

1554

1555

1556

1557

1558

1559

1560

1561

1562

1563

1564

1565

1566

1567

1568

1569

1570

1571

1572

1573

1574

1575

1576

1577

1578

1579

1580

1581

1582

1583

1584

1585

1586

1587

1588

1589

1590

1591

1592

1593

1594

1595

1596

1597

1598

1599

1600

1601

1602

1603

1605

1606

1607

1608 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;

1609 bool BadCFGConflict = false;

1610

1611 for (MachineBasicBlock *Pred : Succ->predecessors()) {

1612 BlockChain *PredChain = BlockToChain[Pred];

1613 if (Pred == Succ || PredChain == &SuccChain ||

1614 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||

1615 Pred != *std::prev(PredChain->end()) ||

1616

1617

1618

1619 (Pred == BB))

1620 continue;

1621

1622

1623

1624

1625

1626

1627

1628

1629

1630

1631

1632

1633

1634 BlockFrequency PredEdgeFreq =

1636 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {

1637 BadCFGConflict = true;

1638 break;

1639 }

1640 }

1641

1642 if (BadCFGConflict) {

1644 << SuccProb << " (prob) (non-cold CFG conflict)\n");

1645 return true;

1646 }

1647

1648 return false;

1649}

1650

1651

1652

1653

1654

1655

1656

1657

1658

1659

1660

1661MachineBlockPlacement::BlockAndTailDupResult

1662MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,

1663 const BlockChain &Chain,

1664 const BlockFilterSet *BlockFilter) {

1666

1667 BlockAndTailDupResult BestSucc = {nullptr, false};

1669

1670 SmallVector<MachineBasicBlock *, 4> Successors;

1671 auto AdjustedSumProb =

1672 collectViableSuccessors(BB, Chain, BlockFilter, Successors);

1673

1675 << "\n");

1676

1677

1678

1679 auto FoundEdge = ComputedEdges.find(BB);

1680 if (FoundEdge != ComputedEdges.end()) {

1681 MachineBasicBlock *Succ = FoundEdge->second.BB;

1682 ComputedEdges.erase(FoundEdge);

1683 BlockChain *SuccChain = BlockToChain[Succ];

1684 if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&

1685 SuccChain != &Chain && Succ == *SuccChain->begin())

1686 return FoundEdge->second;

1687 }

1688

1689

1690

1691 if (isTrellis(BB, Successors, Chain, BlockFilter))

1692 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,

1693 BlockFilter);

1694

1695

1696

1697

1699 DupCandidates;

1700 for (MachineBasicBlock *Succ : Successors) {

1702 BranchProbability SuccProb =

1704

1705 BlockChain &SuccChain = *BlockToChain[Succ];

1706

1707

1708 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,

1709 Chain, BlockFilter)) {

1710

1711 if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ))

1713 continue;

1714 }

1715

1718 << ", probability: " << SuccProb

1719 << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")

1720 << "\n");

1721

1722 if (BestSucc.BB && BestProb >= SuccProb) {

1723 LLVM_DEBUG(dbgs() << " Not the best candidate, continuing\n");

1724 continue;

1725 }

1726

1727 LLVM_DEBUG(dbgs() << " Setting it as best candidate\n");

1728 BestSucc.BB = Succ;

1729 BestProb = SuccProb;

1730 }

1731

1732

1733

1734

1735

1737 [](std::tuple<BranchProbability, MachineBasicBlock *> L,

1738 std::tuple<BranchProbability, MachineBasicBlock *> R) {

1739 return std::get<0>(L) > std::get<0>(R);

1740 });

1741 for (auto &Tup : DupCandidates) {

1742 BranchProbability DupProb;

1743 MachineBasicBlock *Succ;

1744 std::tie(DupProb, Succ) = Tup;

1745 if (DupProb < BestProb)

1746 break;

1747 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&

1748 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {

1750 << ", probability: " << DupProb

1751 << " (Tail Duplicate)\n");

1752 BestSucc.BB = Succ;

1753 BestSucc.ShouldTailDup = true;

1754 break;

1755 }

1756 }

1757

1758 if (BestSucc.BB)

1760

1761 return BestSucc;

1762}

1763

1764

1765

1766

1767

1768

1769

1770

1771

1772

1773

1774MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(

1775 const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {

1776

1777

1778

1779

1780 llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) {

1781 return BlockToChain.lookup(BB) == &Chain;

1782 });

1783

1784 if (WorkList.empty())

1785 return nullptr;

1786

1787 bool IsEHPad = WorkList[0]->isEHPad();

1788

1789 MachineBasicBlock *BestBlock = nullptr;

1790 BlockFrequency BestFreq;

1791 for (MachineBasicBlock *MBB : WorkList) {

1793 "EHPad mismatch between block and work list.");

1794

1795 BlockChain &SuccChain = *BlockToChain[MBB];

1796 if (&SuccChain == &Chain)

1797 continue;

1798

1799 assert(SuccChain.UnscheduledPredecessors == 0 &&

1800 "Found CFG-violating block");

1801

1802 BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);

1805 << " (freq)\n");

1806

1807

1808

1809

1810

1811

1812

1813

1814

1815

1816

1817

1818

1819

1820

1821

1822

1823

1824

1825 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))

1826 continue;

1827

1828 BestBlock = MBB;

1829 BestFreq = CandidateFreq;

1830 }

1831

1832 return BestBlock;

1833}

1834

1835

1836

1837

1838

1839

1840

1841

1842MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(

1843 const BlockChain &PlacedChain,

1845

1847 ++I) {

1848 if (BlockChain *Chain = BlockToChain[&*I]; Chain != &PlacedChain) {

1849 PrevUnplacedBlockIt = I;

1850

1851

1852

1853 return *Chain->begin();

1854 }

1855 }

1856 return nullptr;

1857}

1858

1859

1860

1861

1862

1863

1864

1865

1866

1867

1868

1869MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(

1870 const BlockChain &PlacedChain,

1871 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,

1872 const BlockFilterSet *BlockFilter) {

1873 assert(BlockFilter);

1874 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();

1875 ++PrevUnplacedBlockInFilterIt) {

1876 BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt];

1877 if (C != &PlacedChain) {

1878 return *C->begin();

1879 }

1880 }

1881 return nullptr;

1882}

1883

1884void MachineBlockPlacement::fillWorkLists(

1885 const MachineBasicBlock *MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds,

1886 const BlockFilterSet *BlockFilter = nullptr) {

1887 BlockChain &Chain = *BlockToChain[MBB];

1888 if (!UpdatedPreds.insert(&Chain).second)

1889 return;

1890

1892 Chain.UnscheduledPredecessors == 0 &&

1893 "Attempting to place block with unscheduled predecessors in worklist.");

1894 for (MachineBasicBlock *ChainBB : Chain) {

1895 assert(BlockToChain[ChainBB] == &Chain &&

1896 "Block in chain doesn't match BlockToChain map.");

1897 for (MachineBasicBlock *Pred : ChainBB->predecessors()) {

1898 if (BlockFilter && !BlockFilter->count(Pred))

1899 continue;

1900 if (BlockToChain[Pred] == &Chain)

1901 continue;

1902 ++Chain.UnscheduledPredecessors;

1903 }

1904 }

1905

1906 if (Chain.UnscheduledPredecessors != 0)

1907 return;

1908

1909 MachineBasicBlock *BB = *Chain.begin();

1912 else

1914}

1915

1916void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,

1917 BlockChain &Chain,

1918 BlockFilterSet *BlockFilter) {

1919 assert(HeadBB && "BB must not be null.\n");

1920 assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");

1922 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;

1923 if (BlockFilter)

1924 PrevUnplacedBlockInFilterIt = BlockFilter->begin();

1925

1926 const MachineBasicBlock *LoopHeaderBB = HeadBB;

1927 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);

1928 MachineBasicBlock *BB = *std::prev(Chain.end());

1929 while (true) {

1930 assert(BB && "null block found at end of chain in loop.");

1931 assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");

1932 assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");

1933

1934

1935

1936 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);

1937 MachineBasicBlock *BestSucc = Result.BB;

1938 bool ShouldTailDup = Result.ShouldTailDup;

1939 if (allowTailDupPlacement(*F))

1940 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(

1941 BB, BestSucc, Chain, BlockFilter));

1942

1943

1944

1945

1946 if (!BestSucc)

1947 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);

1948 if (!BestSucc)

1949 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);

1950

1951 if (!BestSucc) {

1952 if (BlockFilter)

1953 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,

1954 BlockFilter);

1955 else

1956 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);

1957 if (!BestSucc)

1958 break;

1959

1960 LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "

1961 "layout successor until the CFG reduces\n");

1962 }

1963

1964

1965

1966 if (allowTailDupPlacement(*F) && BestSucc && ShouldTailDup) {

1967 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,

1968 BlockFilter, PrevUnplacedBlockIt,

1969 PrevUnplacedBlockInFilterIt);

1970

1971

1973 continue;

1974 }

1975

1976

1977 BlockChain &SuccChain = *BlockToChain[BestSucc];

1978

1979

1980 SuccChain.UnscheduledPredecessors = 0;

1983 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);

1984 Chain.merge(BestSucc, &SuccChain);

1985 BB = *std::prev(Chain.end());

1986 }

1987

1988 LLVM_DEBUG(dbgs() << "Finished forming chain for header block "

1990}

1991

1992

1993

1994

1995

1996

1997

1998

1999

2000

2001

2002

2003

2004

2005

2006bool MachineBlockPlacement::canMoveBottomBlockToTop(

2007 const MachineBasicBlock *BottomBlock, const MachineBasicBlock *OldTop) {

2008 if (BottomBlock->pred_size() != 1)

2009 return true;

2010 MachineBasicBlock *Pred = *BottomBlock->pred_begin();

2012 return true;

2013

2014 MachineBasicBlock *OtherBB = *Pred->succ_begin();

2015 if (OtherBB == BottomBlock)

2017 if (OtherBB == OldTop)

2018 return false;

2019

2020 return true;

2021}

2022

2023

2024BlockFrequency

2025MachineBlockPlacement::TopFallThroughFreq(const MachineBasicBlock *Top,

2026 const BlockFilterSet &LoopBlockSet) {

2027 BlockFrequency MaxFreq = BlockFrequency(0);

2028 for (MachineBasicBlock *Pred : Top->predecessors()) {

2029 BlockChain *PredChain = BlockToChain[Pred];

2030 if (!LoopBlockSet.count(Pred) &&

2031 (!PredChain || Pred == *std::prev(PredChain->end()))) {

2032

2033

2035 bool TopOK = true;

2036 for (MachineBasicBlock *Succ : Pred->successors()) {

2038 BlockChain *SuccChain = BlockToChain[Succ];

2039

2040

2041 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&

2042 (!SuccChain || Succ == *SuccChain->begin())) {

2043 TopOK = false;

2044 break;

2045 }

2046 }

2047 if (TopOK) {

2048 BlockFrequency EdgeFreq =

2050 if (EdgeFreq > MaxFreq)

2051 MaxFreq = EdgeFreq;

2052 }

2053 }

2054 }

2055 return MaxFreq;

2056}

2057

2058

2059

2060

2061

2062

2063

2064

2065

2066

2067

2068

2069

2070

2071

2072

2073

2074

2075

2076

2077

2078

2079BlockFrequency MachineBlockPlacement::FallThroughGains(

2080 const MachineBasicBlock *NewTop, const MachineBasicBlock *OldTop,

2081 const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet) {

2082 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);

2083 BlockFrequency FallThrough2Exit = BlockFrequency(0);

2084 if (ExitBB)

2085 FallThrough2Exit =

2086 MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, ExitBB);

2087 BlockFrequency BackEdgeFreq =

2088 MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, OldTop);

2089

2090

2091 MachineBasicBlock *BestPred = nullptr;

2092 BlockFrequency FallThroughFromPred = BlockFrequency(0);

2093 for (MachineBasicBlock *Pred : NewTop->predecessors()) {

2094 if (!LoopBlockSet.count(Pred))

2095 continue;

2096 BlockChain *PredChain = BlockToChain[Pred];

2097 if (!PredChain || Pred == *std::prev(PredChain->end())) {

2098 BlockFrequency EdgeFreq =

2100 if (EdgeFreq > FallThroughFromPred) {

2101 FallThroughFromPred = EdgeFreq;

2102 BestPred = Pred;

2103 }

2104 }

2105 }

2106

2107

2108

2109 BlockFrequency NewFreq = BlockFrequency(0);

2110 if (BestPred) {

2111 for (MachineBasicBlock *Succ : BestPred->successors()) {

2112 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))

2113 continue;

2114 if (ComputedEdges.contains(Succ))

2115 continue;

2116 BlockChain *SuccChain = BlockToChain[Succ];

2117 if ((SuccChain && (Succ != *SuccChain->begin())) ||

2118 (SuccChain == BlockToChain[BestPred]))

2119 continue;

2120 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *

2122 if (EdgeFreq > NewFreq)

2123 NewFreq = EdgeFreq;

2124 }

2125 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *

2127 if (NewFreq > OrigEdgeFreq) {

2128

2129

2130

2131 NewFreq = BlockFrequency(0);

2132 FallThroughFromPred = BlockFrequency(0);

2133 }

2134 }

2135

2136 BlockFrequency Result = BlockFrequency(0);

2137 BlockFrequency Gains = BackEdgeFreq + NewFreq;

2138 BlockFrequency Lost =

2139 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;

2140 if (Gains > Lost)

2141 Result = Gains - Lost;

2143}

2144

2145

2146

2147

2148

2149

2150

2151

2152

2153

2154

2155

2156

2157

2158

2159

2160

2161

2162

2163

2164

2165

2166

2167MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper(

2168 MachineBasicBlock *OldTop, const MachineLoop &L,

2169 const BlockFilterSet &LoopBlockSet) {

2170

2171

2172

2173 BlockChain &HeaderChain = *BlockToChain[OldTop];

2174 if (!LoopBlockSet.count(*HeaderChain.begin()))

2175 return OldTop;

2176 if (OldTop != *HeaderChain.begin())

2177 return OldTop;

2178

2180 << "\n");

2181

2182 BlockFrequency BestGains = BlockFrequency(0);

2183 MachineBasicBlock *BestPred = nullptr;

2184 for (MachineBasicBlock *Pred : OldTop->predecessors()) {

2185 if (!LoopBlockSet.count(Pred))

2186 continue;

2187 if (Pred == L.getHeader())

2188 continue;

2190 << Pred->succ_size() << " successors, "

2191 << printBlockFreq(MBFI->getMBFI(), *Pred) << " freq\n");

2193 continue;

2194

2195 MachineBasicBlock *OtherBB = nullptr;

2198 if (OtherBB == OldTop)

2200 }

2201

2202 if (!canMoveBottomBlockToTop(Pred, OldTop))

2203 continue;

2204

2205 BlockFrequency Gains =

2206 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);

2207 if ((Gains > BlockFrequency(0)) &&

2208 (Gains > BestGains ||

2210 BestPred = Pred;

2211 BestGains = Gains;

2212 }

2213 }

2214

2215

2216 if (!BestPred) {

2218 return OldTop;

2219 }

2220

2221

2222 while (BestPred->pred_size() == 1 &&

2223 (*BestPred->pred_begin())->succ_size() == 1 &&

2224 *BestPred->pred_begin() != L.getHeader())

2226

2228 return BestPred;

2229}

2230

2231

2232

2233

2234

2235MachineBasicBlock *

2236MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,

2237 const BlockFilterSet &LoopBlockSet) {

2238

2239

2240

2241

2242

2243

2244

2246 return L.getHeader();

2247

2248 MachineBasicBlock *OldTop = nullptr;

2249 MachineBasicBlock *NewTop = L.getHeader();

2250 while (NewTop != OldTop) {

2251 OldTop = NewTop;

2252 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);

2253 if (NewTop != OldTop)

2254 ComputedEdges[NewTop] = {OldTop, false};

2255 }

2256 return NewTop;

2257}

2258

2259

2260

2261

2262

2263

2264MachineBasicBlock *

2265MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,

2266 const BlockFilterSet &LoopBlockSet,

2267 BlockFrequency &ExitFreq) {

2268

2269

2270

2271

2272

2273

2274

2275

2276 BlockChain &HeaderChain = *BlockToChain[L.getHeader()];

2277 if (!LoopBlockSet.count(*HeaderChain.begin()))

2278 return nullptr;

2279

2280 BlockFrequency BestExitEdgeFreq;

2281 unsigned BestExitLoopDepth = 0;

2282 MachineBasicBlock *ExitingBB = nullptr;

2283

2284

2285

2286 SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;

2287

2290 for (MachineBasicBlock *MBB : L.getBlocks()) {

2291 BlockChain &Chain = *BlockToChain[MBB];

2292

2293

2294 if (MBB != *std::prev(Chain.end()))

2295 continue;

2296

2297

2298

2299

2300

2301 MachineBasicBlock *OldExitingBB = ExitingBB;

2302 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;

2303 bool HasLoopingSucc = false;

2304 for (MachineBasicBlock *Succ : MBB->successors()) {

2306 continue;

2307 if (Succ == MBB)

2308 continue;

2309 BlockChain &SuccChain = *BlockToChain[Succ];

2310

2311 if (&Chain == &SuccChain) {

2313 << getBlockName(Succ) << " (chain conflict)\n");

2314 continue;

2315 }

2316

2318 if (LoopBlockSet.count(Succ)) {

2320 << getBlockName(Succ) << " (" << SuccProb << ")\n");

2321 HasLoopingSucc = true;

2322 continue;

2323 }

2324

2325 unsigned SuccLoopDepth = 0;

2326 if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {

2327 SuccLoopDepth = ExitLoop->getLoopDepth();

2328 if (ExitLoop->contains(&L))

2329 BlocksExitingToOuterLoop.insert(MBB);

2330 }

2331

2332 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;

2335 << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("

2336 << printBlockFreq(MBFI->getMBFI(), ExitEdgeFreq) << ")\n");

2337

2338

2339

2340

2341 BranchProbability Bias(100 - ExitBlockBias, 100);

2342 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||

2343 ExitEdgeFreq > BestExitEdgeFreq ||

2345 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {

2346 BestExitEdgeFreq = ExitEdgeFreq;

2347 ExitingBB = MBB;

2348 }

2349 }

2350

2351 if (!HasLoopingSucc) {

2352

2353 ExitingBB = OldExitingBB;

2354 BestExitEdgeFreq = OldBestExitEdgeFreq;

2355 }

2356 }

2357

2358

2359 if (!ExitingBB) {

2361 dbgs() << " No other candidate exit blocks, using loop header\n");

2362 return nullptr;

2363 }

2364 if (L.getNumBlocks() == 1) {

2365 LLVM_DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");

2366 return nullptr;

2367 }

2368

2369

2370

2371

2372 if (!BlocksExitingToOuterLoop.empty() &&

2373 !BlocksExitingToOuterLoop.count(ExitingBB))

2374 return nullptr;

2375

2377 << "\n");

2378 ExitFreq = BestExitEdgeFreq;

2379 return ExitingBB;

2380}

2381

2382

2383

2384

2385

2386bool MachineBlockPlacement::hasViableTopFallthrough(

2387 const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet) {

2388 for (MachineBasicBlock *Pred : Top->predecessors()) {

2389 BlockChain *PredChain = BlockToChain[Pred];

2390 if (!LoopBlockSet.count(Pred) &&

2391 (!PredChain || Pred == *std::prev(PredChain->end()))) {

2392

2393

2395 bool TopOK = true;

2396 for (MachineBasicBlock *Succ : Pred->successors()) {

2398 BlockChain *SuccChain = BlockToChain[Succ];

2399

2400

2401 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {

2402 TopOK = false;

2403 break;

2404 }

2405 }

2406 if (TopOK)

2407 return true;

2408 }

2409 }

2410 return false;

2411}

2412

2413

2414

2415

2416

2417

2418

2419void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,

2420 const MachineBasicBlock *ExitingBB,

2421 BlockFrequency ExitFreq,

2422 const BlockFilterSet &LoopBlockSet) {

2423 if (!ExitingBB)

2424 return;

2425

2426 MachineBasicBlock *Top = *LoopChain.begin();

2427 MachineBasicBlock *Bottom = *std::prev(LoopChain.end());

2428

2429

2430 if (Bottom == ExitingBB)

2431 return;

2432

2433

2435 return;

2436

2437 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);

2438

2439

2440

2441

2442 if (ViableTopFallthrough) {

2443 for (MachineBasicBlock *Succ : Bottom->successors()) {

2444 BlockChain *SuccChain = BlockToChain[Succ];

2445 if (!LoopBlockSet.count(Succ) &&

2446 (!SuccChain || Succ == *SuccChain->begin()))

2447 return;

2448 }

2449

2450

2451

2452 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);

2453 if (FallThrough2Top >= ExitFreq)

2454 return;

2455 }

2456

2457 BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);

2458 if (ExitIt == LoopChain.end())

2459 return;

2460

2461

2462

2463

2464

2465

2466

2467

2468

2469

2470

2471

2472

2473

2474

2475

2476

2477

2478

2479

2480 if (ViableTopFallthrough) {

2481 assert(std::next(ExitIt) != LoopChain.end() &&

2482 "Exit should not be last BB");

2483 MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);

2484 if (ExitingBB->isSuccessor(NextBlockInChain))

2486 return;

2487 }

2488

2490 << " at bottom\n");

2491 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());

2492}

2493

2494

2495

2496

2497

2498

2499

2500

2501

2502

2503

2504

2505

2506

2507void MachineBlockPlacement::rotateLoopWithProfile(

2508 BlockChain &LoopChain, const MachineLoop &L,

2509 const BlockFilterSet &LoopBlockSet) {

2510 auto RotationPos = LoopChain.end();

2511 MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();

2512

2513

2515 return;

2516

2518

2519

2520

2521 auto ScaleBlockFrequency = [](BlockFrequency Freq,

2522 unsigned Scale) -> BlockFrequency {

2523 if (Scale == 0)

2524 return BlockFrequency(0);

2525

2526

2527 return Freq / BranchProbability(1, Scale);

2528 };

2529

2530

2531

2532

2533 BlockFrequency HeaderFallThroughCost(0);

2534 for (auto *Pred : ChainHeaderBB->predecessors()) {

2535 BlockChain *PredChain = BlockToChain[Pred];

2536 if (!LoopBlockSet.count(Pred) &&

2537 (!PredChain || Pred == *std::prev(PredChain->end()))) {

2538 auto EdgeFreq = MBFI->getBlockFreq(Pred) *

2540 auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);

2541

2542

2544 FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);

2545 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);

2546 }

2547 }

2548

2549

2550

2551

2552

2554 for (auto *BB : LoopChain) {

2556 for (auto *Succ : BB->successors()) {

2557 BlockChain *SuccChain = BlockToChain[Succ];

2558 if (!LoopBlockSet.count(Succ) &&

2559 (!SuccChain || Succ == *SuccChain->begin())) {

2561 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);

2562 }

2563 }

2565 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;

2567 }

2568 }

2569

2570

2571

2572

2573 for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),

2574 EndIter = LoopChain.end();

2575 Iter != EndIter; Iter++, TailIter++) {

2576

2577

2578 if (TailIter == LoopChain.end())

2579 TailIter = LoopChain.begin();

2580

2581 auto TailBB = *TailIter;

2582

2583

2584 BlockFrequency Cost = BlockFrequency(0);

2585

2586

2587

2588

2589 if (Iter != LoopChain.begin())

2590 Cost += HeaderFallThroughCost;

2591

2592

2593

2594 for (auto &ExitWithFreq : ExitsWithFreq)

2595 if (TailBB != ExitWithFreq.first)

2596 Cost += ExitWithFreq.second;

2597

2598

2599

2600

2601

2602

2603

2604

2605

2606

2607

2608

2609

2610

2611

2612 if (TailBB->isSuccessor(*Iter)) {

2613 auto TailBBFreq = MBFI->getBlockFreq(TailBB);

2614 if (TailBB->succ_size() == 1)

2616 else if (TailBB->succ_size() == 2) {

2618 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;

2619 auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)

2620 ? TailBBFreq * TailToHeadProb.getCompl()

2621 : TailToHeadFreq;

2623 ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);

2624 }

2625 }

2626

2627 LLVM_DEBUG(dbgs() << "The cost of loop rotation by making "

2630

2631 if (Cost < SmallestRotationCost) {

2632 SmallestRotationCost = Cost;

2633 RotationPos = Iter;

2634 }

2635 }

2636

2637 if (RotationPos != LoopChain.end()) {

2639 << " to the top\n");

2640 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());

2641 }

2642}

2643

2644

2645

2646

2647

2648MachineBlockPlacement::BlockFilterSet

2649MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {

2650

2651

2652 struct MBBCompare {

2653 bool operator()(const MachineBasicBlock *X,

2654 const MachineBasicBlock *Y) const {

2655 return X->getNumber() < Y->getNumber();

2656 }

2657 };

2658 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;

2659

2660

2661

2662

2663

2664

2665

2666

2667

2668

2670 BlockFrequency LoopFreq(0);

2671 for (auto *LoopPred : L.getHeader()->predecessors())

2672 if (L.contains(LoopPred))

2673 LoopFreq += MBFI->getBlockFreq(LoopPred) *

2675

2676 for (MachineBasicBlock *LoopBB : L.getBlocks()) {

2677 if (LoopBlockSet.count(LoopBB))

2678 continue;

2679 auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();

2681 continue;

2682 BlockChain *Chain = BlockToChain[LoopBB];

2683 for (MachineBasicBlock *ChainBB : *Chain)

2684 LoopBlockSet.insert(ChainBB);

2685 }

2686 } else

2687 LoopBlockSet.insert(L.block_begin(), L.block_end());

2688

2689

2690

2691

2692 BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());

2693 return Ret;

2694}

2695

2696

2697

2698

2699

2700

2701

2702void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {

2703

2704

2705 for (const MachineLoop *InnerLoop : L)

2706 buildLoopChains(*InnerLoop);

2707

2709 "BlockWorkList not empty when starting to build loop chains.");

2711 "EHPadWorkList not empty when starting to build loop chains.");

2712 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);

2713

2714

2715

2716

2717 bool RotateLoopWithProfile =

2720

2721

2722

2723

2724

2725 MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);

2726

2727

2728

2729

2730

2731

2732

2733 PreferredLoopExit = nullptr;

2734 BlockFrequency ExitFreq;

2735 if (!RotateLoopWithProfile && LoopTop == L.getHeader())

2736 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);

2737

2738 BlockChain &LoopChain = *BlockToChain[LoopTop];

2739

2740

2741

2742

2743 SmallPtrSet<BlockChain *, 4> UpdatedPreds;

2744 assert(LoopChain.UnscheduledPredecessors == 0 &&

2745 "LoopChain should not have unscheduled predecessors.");

2746 UpdatedPreds.insert(&LoopChain);

2747

2748 for (const MachineBasicBlock *LoopBB : LoopBlockSet)

2749 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);

2750

2751 buildChain(LoopTop, LoopChain, &LoopBlockSet);

2752

2753 if (RotateLoopWithProfile)

2754 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);

2755 else

2756 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);

2757

2759

2760 bool BadLoop = false;

2761 if (LoopChain.UnscheduledPredecessors) {

2762 BadLoop = true;

2763 dbgs() << "Loop chain contains a block without its preds placed!\n"

2764 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"

2765 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";

2766 }

2767 for (MachineBasicBlock *ChainBB : LoopChain) {

2769 if (!LoopBlockSet.remove(ChainBB)) {

2770

2771

2772

2773 dbgs() << "Loop chain contains a block not contained by the loop!\n"

2774 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"

2775 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"

2776 << " Bad block: " << getBlockName(ChainBB) << "\n";

2777 }

2778 }

2779

2780 if (!LoopBlockSet.empty()) {

2781 BadLoop = true;

2782 for (const MachineBasicBlock *LoopBB : LoopBlockSet)

2783 dbgs() << "Loop contains blocks never placed into a chain!\n"

2784 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"

2785 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"

2786 << " Bad block: " << getBlockName(LoopBB) << "\n";

2787 }

2788 assert(!BadLoop && "Detected problems with the placement of this loop.");

2789 });

2790

2791 BlockWorkList.clear();

2792 EHPadWorkList.clear();

2793}

2794

2795void MachineBlockPlacement::buildCFGChains() {

2796

2797

2800 ++FI) {

2801 MachineBasicBlock *BB = &*FI;

2802 BlockChain *Chain =

2803 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);

2804

2805

2806 while (true) {

2807 Cond.clear();

2808 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

2810 break;

2811

2813 MachineBasicBlock *NextBB = &*NextFI;

2814

2815

2816 assert(NextFI != FE && "Can't fallthrough past the last block.");

2817 LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "

2819 << "\n");

2820 Chain->merge(NextBB, nullptr);

2821#ifndef NDEBUG

2822 BlocksWithUnanalyzableExits.insert(&*BB);

2823#endif

2824 FI = NextFI;

2825 BB = NextBB;

2826 }

2827 }

2828

2829

2830 PreferredLoopExit = nullptr;

2831 for (MachineLoop *L : *MLI)

2832 buildLoopChains(*L);

2833

2835 "BlockWorkList should be empty before building final chain.");

2837 "EHPadWorkList should be empty before building final chain.");

2838

2839 SmallPtrSet<BlockChain *, 4> UpdatedPreds;

2840 for (MachineBasicBlock &MBB : *F)

2841 fillWorkLists(&MBB, UpdatedPreds);

2842

2843 BlockChain &FunctionChain = *BlockToChain[&F->front()];

2844 buildChain(&F->front(), FunctionChain);

2845

2846#ifndef NDEBUG

2847 using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;

2848#endif

2850

2851 bool BadFunc = false;

2852 FunctionBlockSetType FunctionBlockSet;

2853 for (MachineBasicBlock &MBB : *F)

2854 FunctionBlockSet.insert(&MBB);

2855

2856 for (MachineBasicBlock *ChainBB : FunctionChain)

2857 if (!FunctionBlockSet.erase(ChainBB)) {

2858 BadFunc = true;

2859 dbgs() << "Function chain contains a block not in the function!\n"

2860 << " Bad block: " << getBlockName(ChainBB) << "\n";

2861 }

2862

2863 if (!FunctionBlockSet.empty()) {

2864 BadFunc = true;

2865 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)

2866 dbgs() << "Function contains blocks never placed into a chain!\n"

2867 << " Bad block: " << getBlockName(RemainingBB) << "\n";

2868 }

2869 assert(!BadFunc && "Detected problems with the block placement.");

2870 });

2871

2872

2873

2874 SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(

2875 F->getNumBlockIDs());

2876 {

2877 MachineBasicBlock *LastMBB = nullptr;

2878 for (auto &MBB : *F) {

2879 if (LastMBB != nullptr)

2880 OriginalLayoutSuccessors[LastMBB->getNumber()] = &MBB;

2881 LastMBB = &MBB;

2882 }

2883 OriginalLayoutSuccessors[F->back().getNumber()] = nullptr;

2884 }

2885

2886

2888 LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");

2889 for (MachineBasicBlock *ChainBB : FunctionChain) {

2890 LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "

2891 : " ... ")

2894 F->splice(InsertPos, ChainBB);

2895 else

2896 ++InsertPos;

2897

2898

2899 if (ChainBB == *FunctionChain.begin())

2900 continue;

2902

2903

2904

2905

2906 Cond.clear();

2907 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

2908

2909#ifndef NDEBUG

2910 if (!BlocksWithUnanalyzableExits.count(PrevBB)) {

2911

2912

2913

2916 "Unexpected block with un-analyzable fallthrough!");

2917 Cond.clear();

2918 TBB = FBB = nullptr;

2919 }

2920#endif

2921

2922

2923

2924

2925

2926

2927

2928

2929

2930

2931

2932

2933

2934

2935

2936

2937

2938

2939

2940

2941

2942

2945 }

2946 }

2947

2948

2949 Cond.clear();

2950 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

2952 MachineBasicBlock *PrevBB = &F->back();

2954 }

2955

2956 BlockWorkList.clear();

2957 EHPadWorkList.clear();

2958}

2959

2960void MachineBlockPlacement::optimizeBranches() {

2961 BlockChain &FunctionChain = *BlockToChain[&F->front()];

2963

2964

2965

2966

2967

2968

2969

2970 for (MachineBasicBlock *ChainBB : FunctionChain) {

2971 Cond.clear();

2972 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

2974 continue;

2976 continue;

2977

2978

2979

2981 continue;

2982

2983

2986 continue;

2988 continue;

2989 LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "

2992 << "\n");

2993 auto Dl = ChainBB->findBranchDebugLoc();

2996 }

2997}

2998

2999void MachineBlockPlacement::alignBlocks() {

3000

3001

3002

3003

3004

3006 if (F->getFunction().hasMinSize() ||

3008 return;

3009 }

3010

3011 BlockChain &FunctionChain = *BlockToChain[&F->front()];

3012

3013 if (FunctionChain.begin() == FunctionChain.end())

3014 return;

3015

3016 const BranchProbability ColdProb(1, 5);

3017 BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());

3018 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;

3019 for (MachineBasicBlock *ChainBB : FunctionChain) {

3020 if (ChainBB == *FunctionChain.begin())

3021 continue;

3022

3023

3024

3025

3026

3027 MachineLoop *L = MLI->getLoopFor(ChainBB);

3028 if (!L)

3029 continue;

3030

3032 unsigned MDAlign = 1;

3033 MDNode *LoopID = L->getLoopID();

3034 if (LoopID) {

3037 if (MD == nullptr)

3038 continue;

3040 if (S == nullptr)

3041 continue;

3042 if (S->getString() == "llvm.loop.align") {

3044 "per-loop align metadata should have two operands.");

3045 MDAlign =

3047 assert(MDAlign >= 1 && "per-loop align value must be positive.");

3048 }

3049 }

3050 }

3051

3052

3053 const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));

3054 if (LoopAlign == 1)

3055 continue;

3056

3057

3058

3059 BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);

3060 if (Freq < WeightedEntryFreq)

3061 continue;

3062

3063

3064

3065 MachineBasicBlock *LoopHeader = L->getHeader();

3066 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);

3067 if (Freq < (LoopHeaderFreq * ColdProb))

3068 continue;

3069

3070

3073 continue;

3074

3075

3076

3077 MachineBasicBlock *LayoutPred =

3079

3080 auto DetermineMaxAlignmentPadding = [&]() {

3081

3082 unsigned MaxBytes;

3085 else

3087 ChainBB->setMaxBytesForAlignment(MaxBytes);

3088 };

3089

3090

3091

3092 if (!LayoutPred->isSuccessor(ChainBB)) {

3093 ChainBB->setAlignment(LoopAlign);

3094 DetermineMaxAlignmentPadding();

3095 continue;

3096 }

3097

3098

3099

3100

3101

3102 BranchProbability LayoutProb =

3104 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;

3105 if (LayoutEdgeFreq <= (Freq * ColdProb)) {

3106 ChainBB->setAlignment(LoopAlign);

3107 DetermineMaxAlignmentPadding();

3108 }

3109 }

3110

3111 const bool HasMaxBytesOverride =

3113

3115

3116 for (MachineBasicBlock &MBB : *F) {

3117 if (HasMaxBytesOverride)

3120 else

3122 }

3124

3125

3126 for (auto MBI = std::next(F->begin()), MBE = F->end(); MBI != MBE; ++MBI) {

3127 auto LayoutPred = std::prev(MBI);

3129 if (HasMaxBytesOverride)

3132 else

3134 }

3135 }

3136 }

3137}

3138

3139

3140

3141

3142

3143

3144

3145

3146

3147

3148

3149

3150

3151

3152

3153

3154bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(

3155 MachineBasicBlock *BB, MachineBasicBlock *&LPred,

3156 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,

3158 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {

3159 bool Removed, DuplicatedToLPred;

3160 bool DuplicatedToOriginalLPred;

3161 Removed = maybeTailDuplicateBlock(

3162 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,

3163 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);

3164 if (!Removed)

3165 return false;

3166 DuplicatedToOriginalLPred = DuplicatedToLPred;

3167

3168

3169

3170

3171 while (DuplicatedToLPred && Removed) {

3172 MachineBasicBlock *DupBB, *DupPred;

3173

3174

3175

3176

3177

3178 BlockChain::iterator ChainEnd = Chain.end();

3179 DupBB = *(--ChainEnd);

3180

3181 if (ChainEnd == Chain.begin())

3182 break;

3183 DupPred = *std::prev(ChainEnd);

3184 Removed = maybeTailDuplicateBlock(

3185 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,

3186 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);

3187 }

3188

3189

3190

3191

3192

3193 LPred = *std::prev(Chain.end());

3194 if (DuplicatedToOriginalLPred)

3195 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);

3196 return true;

3197}

3198

3199

3200

3201

3202

3203

3204

3205

3206

3207

3208

3209

3210

3211

3212bool MachineBlockPlacement::maybeTailDuplicateBlock(

3213 MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,

3215 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,

3216 bool &DuplicatedToLPred) {

3217 DuplicatedToLPred = false;

3218 if (!shouldTailDuplicate(BB))

3219 return false;

3220

3222 << "\n");

3223

3224

3225

3226 bool Removed = false;

3227 auto RemovalCallback = [&](MachineBasicBlock *RemBB) {

3228

3229 Removed = true;

3230

3231

3232 if (auto It = BlockToChain.find(RemBB); It != BlockToChain.end()) {

3233 It->second->remove(RemBB);

3234 BlockToChain.erase(It);

3235 }

3236

3237

3238 if (&(*PrevUnplacedBlockIt) == RemBB) {

3239 PrevUnplacedBlockIt++;

3240 }

3241

3242

3243 if (RemBB->isEHPad()) {

3245 } else {

3247 }

3248

3249

3250 if (BlockFilter) {

3251 auto It = llvm::find(*BlockFilter, RemBB);

3252

3253

3254 if (It != BlockFilter->end()) {

3255 if (It < PrevUnplacedBlockInFilterIt) {

3256 const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;

3257

3258

3259 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;

3260 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance;

3261 assert(*PrevUnplacedBlockInFilterIt == PrevBB);

3262 (void)PrevBB;

3263 } else if (It == PrevUnplacedBlockInFilterIt)

3264

3265

3266 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);

3267 else

3268 BlockFilter->erase(It);

3269 }

3270 }

3271

3272

3273 MLI->removeBlock(RemBB);

3274 if (RemBB == PreferredLoopExit)

3275 PreferredLoopExit = nullptr;

3276

3278 << "\n");

3279 };

3280 auto RemovalCallbackRef =

3281 function_ref<void(MachineBasicBlock *)>(RemovalCallback);

3282

3283 SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;

3284 bool IsSimple = TailDup.isSimpleBB(BB);

3285 SmallVector<MachineBasicBlock *, 8> CandidatePreds;

3286 SmallVectorImpl<MachineBasicBlock *> *CandidatePtr = nullptr;

3287 if (F->getFunction().hasProfileData()) {

3288

3289 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);

3290 if (CandidatePreds.size() == 0)

3291 return false;

3293 CandidatePtr = &CandidatePreds;

3294 }

3296 &RemovalCallbackRef, CandidatePtr);

3297

3298

3299 DuplicatedToLPred = false;

3300 for (MachineBasicBlock *Pred : DuplicatedPreds) {

3301

3302 BlockChain *PredChain = BlockToChain[Pred];

3303 if (Pred == LPred)

3304 DuplicatedToLPred = true;

3305 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||

3306 PredChain == &Chain)

3307 continue;

3308 for (MachineBasicBlock *NewSucc : Pred->successors()) {

3309 if (BlockFilter && !BlockFilter->count(NewSucc))

3310 continue;

3311 BlockChain *NewChain = BlockToChain[NewSucc];

3312 if (NewChain != &Chain && NewChain != PredChain)

3313 NewChain->UnscheduledPredecessors++;

3314 }

3315 }

3316 return Removed;

3317}

3318

3319

3323 if (MI.isPHI() && MI.isMetaInstruction())

3325 }

3327}

3328

3329

3330

3331

3332BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {

3334}

3335

3336

3337bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,

3338 MachineBasicBlock *Pred,

3339 BlockFilterSet *BlockFilter) {

3340 if (BB == Pred)

3341 return false;

3342 if (BlockFilter && !BlockFilter->count(Pred))

3343 return false;

3344 BlockChain *PredChain = BlockToChain[Pred];

3345 if (PredChain && (Pred != *std::prev(PredChain->end())))

3346 return false;

3347

3348

3350 for (MachineBasicBlock *Succ : Pred->successors())

3351 if (Succ != BB) {

3352 if (BlockFilter && !BlockFilter->count(Succ))

3353 continue;

3354 BlockChain *SuccChain = BlockToChain[Succ];

3355 if (SuccChain && (Succ != *SuccChain->begin()))

3356 continue;

3358 if (SuccProb > BestProb)

3359 BestProb = SuccProb;

3360 }

3361

3363 if (BBProb <= BestProb)

3364 return false;

3365

3366

3367

3368 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);

3369 BlockFrequency Gain = PredFreq * (BBProb - BestProb);

3370 return Gain > scaleThreshold(BB);

3371}

3372

3373

3374

3375void MachineBlockPlacement::findDuplicateCandidates(

3376 SmallVectorImpl<MachineBasicBlock *> &Candidates, MachineBasicBlock *BB,

3377 BlockFilterSet *BlockFilter) {

3378 MachineBasicBlock *Fallthrough = nullptr;

3380 BlockFrequency BBDupThreshold(scaleThreshold(BB));

3381 SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors());

3382 SmallVector<MachineBasicBlock *, 8> Succs(BB->successors());

3383

3384

3385 auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) {

3387 };

3388 auto CmpPred = [&](MachineBasicBlock *A, MachineBasicBlock *B) {

3389 return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(B);

3390 };

3393

3394 auto SuccIt = Succs.begin();

3395 if (SuccIt != Succs.end()) {

3397 }

3398

3399

3400

3401

3402

3403

3404

3405

3406

3407

3408

3409

3410

3411

3412

3413

3414

3415

3416

3417

3418

3419

3420

3421

3422

3423

3424

3425

3426

3427

3428

3429

3430

3431

3432

3433

3434

3435

3436

3437

3438

3439

3440 for (MachineBasicBlock *Pred : Preds) {

3441 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);

3442

3444

3445

3446 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {

3447 Fallthrough = Pred;

3448 if (SuccIt != Succs.end())

3449 SuccIt++;

3450 }

3451 continue;

3452 }

3453

3454 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;

3455 BlockFrequency DupCost;

3456 if (SuccIt == Succs.end()) {

3457

3458 if (Succs.size() > 0)

3459 DupCost += PredFreq;

3460 } else {

3461

3462 DupCost += PredFreq;

3464 }

3465

3466 assert(OrigCost >= DupCost);

3467 OrigCost -= DupCost;

3468 if (OrigCost > BBDupThreshold) {

3470 if (SuccIt != Succs.end())

3471 SuccIt++;

3472 }

3473 }

3474

3475

3476

3477 if (!Fallthrough) {

3478 if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) {

3479 Candidates[0] = Candidates.back();

3481 }

3482 }

3483}

3484

3485void MachineBlockPlacement::initTailDupThreshold() {

3486 DupThreshold = BlockFrequency(0);

3487 if (F->getFunction().hasProfileData()) {

3488

3491 UseProfileCount = true;

3492 DupThreshold =

3494 } else {

3495

3496 BlockFrequency MaxFreq = BlockFrequency(0);

3497 for (MachineBasicBlock &MBB : *F) {

3498 BlockFrequency Freq = MBFI->getBlockFreq(&MBB);

3499 if (Freq > MaxFreq)

3500 MaxFreq = Freq;

3501 }

3502

3504 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);

3505 UseProfileCount = false;

3506 }

3507 }

3508

3510

3514

3515

3516

3517 if (OptLevel >= CodeGenOptLevel::Aggressive) {

3518

3519

3520

3524 }

3525

3526

3527

3529 (OptLevel < CodeGenOptLevel::Aggressive ||

3532}

3533

3534PreservedAnalyses

3538 auto MBFI = std::make_unique(

3541 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)

3543 : nullptr;

3545 .getCachedResult(

3547 if (!PSI)

3548 report_fatal_error("MachineBlockPlacement requires ProfileSummaryAnalysis",

3549 false);

3550

3551 MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,

3552 AllowTailMerge);

3553

3554 if (!MBP.run(MF))

3556

3558}

3559

3563 OS << MapClassName2PassName(name());

3564 if (!AllowTailMerge)

3565 OS << "";

3566}

3567

3569

3570

3571 if (std::next(MF.begin()) == MF.end())

3572 return false;

3573

3574 F = &MF;

3575 OptLevel = F->getTarget().getOptLevel();

3576

3579

3580

3581

3582 PreferredLoopExit = nullptr;

3583

3585 "BlockToChain map should be empty before starting placement.");

3587 "Computed Edge map should be empty before starting placement.");

3588

3589

3590 initTailDupThreshold();

3591

3592 const bool OptForSize =

3594

3595

3596

3597 bool UseExtTspForPerf = false;

3598 bool UseExtTspForSize = false;

3600 UseExtTspForPerf =

3604 }

3605

3606

3607 if (allowTailDupPlacement(*F)) {

3608 if (OptForSize)

3609 TailDupSize = 1;

3610 const bool PreRegAlloc = false;

3611 TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,

3612 true, TailDupSize);

3613 if (!UseExtTspForSize)

3614 precomputeTriangleChains();

3615 }

3616

3617

3618 if (!UseExtTspForSize)

3619 buildCFGChains();

3620

3621

3622

3623

3626 MF.size() > 3;

3627

3628 if (EnableTailMerge) {

3630 BranchFolder BF(true, false,

3632

3634 true)) {

3635

3636 if (MPDT)

3638 if (!UseExtTspForSize) {

3639

3640 BlockToChain.clear();

3641 ComputedEdges.clear();

3643 buildCFGChains();

3644 }

3645 }

3646 }

3647

3648

3649

3650

3651 if (UseExtTspForPerf || UseExtTspForSize) {

3653 !(UseExtTspForPerf && UseExtTspForSize) &&

3654 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");

3655 applyExtTsp(UseExtTspForSize);

3656 createCFGChainExtTsp();

3657 }

3658

3659 optimizeBranches();

3660 alignBlocks();

3661

3662 BlockToChain.clear();

3663 ComputedEdges.clear();

3665

3666

3672 MBFI->view("MBP." + MF.getName(), false);

3673 }

3674

3675

3676

3677 return true;

3678}

3679

3680void MachineBlockPlacement::applyExtTsp(bool OptForSize) {

3681

3682 DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;

3683 BlockIndex.reserve(F->size());

3684 std::vector<const MachineBasicBlock *> CurrentBlockOrder;

3685 CurrentBlockOrder.reserve(F->size());

3686 size_t NumBlocks = 0;

3687 for (const MachineBasicBlock &MBB : *F) {

3688 BlockIndex[&MBB] = NumBlocks++;

3689 CurrentBlockOrder.push_back(&MBB);

3690 }

3691

3692 SmallVector<uint64_t, 0> BlockCounts(F->size());

3693 SmallVector<uint64_t, 0> BlockSizes(F->size());

3697 for (MachineBasicBlock &MBB : *F) {

3698

3699 BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);

3700 BlockCounts[BlockIndex[&MBB]] = OptForSize ? 1 : BlockFreq.getFrequency();

3701

3702

3703

3704

3705

3706

3707 auto NonDbgInsts =

3709 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());

3710 BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts;

3711

3712

3713 if (OptForSize) {

3714 Cond.clear();

3715 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

3717 continue;

3718

3720

3721

3722

3724 if (TBB && TBB != FTB)

3726 if (FBB && FBB != FTB)

3728 if (FTB)

3730

3731

3732

3733 const uint64_t Freq = Succs.size() == 1 ? 110 : 100;

3734 for (const MachineBasicBlock *Succ : Succs)

3735 JumpCounts.push_back({BlockIndex[&MBB], BlockIndex[Succ], Freq});

3736 } else {

3737 for (MachineBasicBlock *Succ : MBB.successors()) {

3739 BlockFrequency JumpFreq = BlockFreq * EP;

3741 {BlockIndex[&MBB], BlockIndex[Succ], JumpFreq.getFrequency()});

3742 }

3743 }

3744 }

3745

3746 LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size()

3747 << " with profile = " << F->getFunction().hasProfileData()

3748 << " (" << F->getName() << ")" << "\n");

3749

3750 const double OrgScore = calcExtTspScore(BlockSizes, JumpCounts);

3752

3753

3754 auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);

3755 std::vector<const MachineBasicBlock *> NewBlockOrder;

3756 NewBlockOrder.reserve(F->size());

3757 for (uint64_t Node : NewOrder) {

3758 NewBlockOrder.push_back(CurrentBlockOrder[Node]);

3759 }

3760 const double OptScore = calcExtTspScore(NewOrder, BlockSizes, JumpCounts);

3762

3763

3764 if (OptForSize && OrgScore > OptScore)

3765 assignBlockOrder(CurrentBlockOrder);

3766 else

3767 assignBlockOrder(NewBlockOrder);

3768}

3769

3770void MachineBlockPlacement::assignBlockOrder(

3771 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {

3772 assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order");

3773 F->RenumberBlocks();

3774

3775

3776

3777

3778 MPDT = nullptr;

3779

3780 bool HasChanges = false;

3781 for (size_t I = 0; I < NewBlockOrder.size(); I++) {

3782 if (NewBlockOrder[I] != F->getBlockNumbered(I)) {

3783 HasChanges = true;

3784 break;

3785 }

3786 }

3787

3788 if (!HasChanges)

3789 return;

3790

3791 SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(F->getNumBlockIDs());

3792 for (auto &MBB : *F) {

3794 }

3795

3796

3797 DenseMap<const MachineBasicBlock *, size_t> NewIndex;

3798 for (const MachineBasicBlock *MBB : NewBlockOrder) {

3799 NewIndex[MBB] = NewIndex.size();

3800 }

3801 F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {

3802 return NewIndex[&L] < NewIndex[&R];

3803 });

3804

3805

3806

3807 const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();

3809 for (auto &MBB : *F) {

3812 auto *FTMBB = PrevFallThroughs[MBB.getNumber()];

3813

3814

3815

3816 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {

3818 }

3819

3820

3821 Cond.clear();

3822 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

3824 continue;

3826 }

3827}

3828

3829void MachineBlockPlacement::createCFGChainExtTsp() {

3830 BlockToChain.clear();

3831 ComputedEdges.clear();

3833

3834 MachineBasicBlock *HeadBB = &F->front();

3835 BlockChain *FunctionChain =

3836 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB);

3837

3838 for (MachineBasicBlock &MBB : *F) {

3839 if (HeadBB == &MBB)

3840 continue;

3841 FunctionChain->merge(&MBB, nullptr);

3842 }

3843}

3844

3845namespace {

3846

3847

3848

3849

3850

3851

3852

3853class MachineBlockPlacementStats {

3854

3855 const MachineBranchProbabilityInfo *MBPI;

3856

3857

3858 const MachineBlockFrequencyInfo *MBFI;

3859

3860public:

3861 MachineBlockPlacementStats(const MachineBranchProbabilityInfo *MBPI,

3862 const MachineBlockFrequencyInfo *MBFI)

3863 : MBPI(MBPI), MBFI(MBFI) {}

3864 bool run(MachineFunction &MF);

3865};

3866

3867class MachineBlockPlacementStatsLegacy : public MachineFunctionPass {

3868public:

3869 static char ID;

3870

3871 MachineBlockPlacementStatsLegacy() : MachineFunctionPass(ID) {

3874 }

3875

3876 bool runOnMachineFunction(MachineFunction &F) override {

3877 auto *MBPI =

3878 &getAnalysis().getMBPI();

3879 auto *MBFI = &getAnalysis().getMBFI();

3880 return MachineBlockPlacementStats(MBPI, MBFI).run(F);

3881 }

3882

3883 void getAnalysisUsage(AnalysisUsage &AU) const override {

3884 AU.addRequired();

3885 AU.addRequired();

3888 }

3889};

3890

3891}

3892

3893char MachineBlockPlacementStatsLegacy::ID = 0;

3894

3896

3898 "Basic Block Placement Stats", false, false)

3903

3909

3910 MachineBlockPlacementStats(&MBPI, &MBFI).run(MF);

3912}

3913

3915

3916 if (std::next(F.begin()) == F.end())

3917 return false;

3918

3920 return false;

3921

3925 (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;

3927 (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;

3929

3930 if (MBB.isLayoutSuccessor(Succ))

3931 continue;

3932

3935 ++NumBranches;

3937 }

3938 }

3939

3940 return false;

3941}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

This file defines the BumpPtrAllocator interface.

static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Declares methods and data structures for code layout algorithms.

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

static unsigned InstrCount

This file defines the DenseMap class.

static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)

static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)

The helper function returns the branch probability that is adjusted or normalized over the new total ...

Definition MachineBlockPlacement.cpp:810

static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)

static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)

static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)

static cl::opt< unsigned > PredecessorLimit("block-placement-predecessor-limit", cl::desc("For blocks with more predecessors, certain layout optimizations" "will be disabled to prevent quadratic compile time."), cl::init(1000), cl::Hidden)

static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)

Definition MachineBlockPlacement.cpp:1445

static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)

static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)

static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)

static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)

static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)

static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)

static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)

Compare 2 BlockFrequency's with a small penalty for A.

Definition MachineBlockPlacement.cpp:858

static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)

static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)

static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)

static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)

static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)

Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)

Helper to print the name of a MBB.

Definition MachineBlockPlacement.cpp:698

static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))

static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)

Check if BB has exactly the successors in Successors.

Definition MachineBlockPlacement.cpp:825

static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)

static uint64_t countMBBInstruction(MachineBasicBlock *MBB)

Definition MachineBlockPlacement.cpp:3320

static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)

static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)

static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)

static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)

static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB

const SmallVectorImpl< MachineOperand > & Cond

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

This file describes how to lower LLVM code to machine code.

Target-Independent Code Generator Pass Configuration Options pass.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

void setPreservesAll()

Set by analyses that do not transform their input at all.

static BlockFrequency max()

Returns the maximum possible frequency, the saturation value.

uint64_t getFrequency() const

Returns the frequency as a fixpoint number scaled by the entry frequency.

static BranchProbability getOne()

uint32_t getNumerator() const

BranchProbability getCompl() const

static BranchProbability getZero()

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool erase(const KeyT &Val)

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

void reserve(size_type NumEntries)

Grow the densemap so that it can contain at least NumEntries items before resizing again.

bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const

dominates - Returns true iff A dominates B.

void recalculate(ParentType &Func)

recalculate - compute a dominator tree for the given function

bool hasProfileData(bool IncludeSynthetic=false) const

Return true if the function is annotated with profile data.

Module * getParent()

Get the module that this global value is contained inside of...

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

const MDOperand & getOperand(unsigned I) const

ArrayRef< MDOperand > operands() const

unsigned getNumOperands() const

Return number of MDNode operands.

LLVM_ABI StringRef getString() const

unsigned pred_size() const

bool isEHPad() const

Returns true if the block is a landing pad.

instr_iterator instr_begin()

LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)

Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)

Update the terminator instructions in block to account for changes to block layout which may have bee...

LLVM_ABI bool canFallThrough()

Return true if the block can implicitly transfer control to the block after it by falling off the end...

succ_iterator succ_begin()

unsigned succ_size() const

void setAlignment(Align A)

Set alignment of the basic block.

LLVM_ABI bool isEntryBlock() const

Returns true if this is the entry block of the function.

pred_iterator pred_begin()

succ_reverse_iterator succ_rbegin()

LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const

Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...

instr_iterator instr_end()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

LLVM_ABI instr_iterator erase(instr_iterator I)

Remove an instruction from the instruction list and delete it.

LLVM_ABI DebugLoc findBranchDebugLoc()

Find and return the merged DebugLoc of the branch instructions of the block.

iterator_range< succ_iterator > successors()

LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const

Return true if the specified MBB is a successor of this block.

iterator_range< pred_iterator > predecessors()

LLVM_ABI StringRef getName() const

Return the name of the corresponding LLVM basic block, or an empty string.

LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition MachineBlockPlacement.cpp:3535

void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const

Definition MachineBlockPlacement.cpp:3560

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition MachineBlockPlacement.cpp:3905

BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

StringRef getName() const

getName - Return the name of the corresponding LLVM function.

Function & getFunction()

Return the LLVM function that this machine code represents.

BasicBlockListType::iterator iterator

void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)

RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Representation of each machine instruction.

Analysis pass that exposes the MachineLoopInfo for a machine function.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.

LLVM_ABI uint64_t getOrCompHotCountThreshold() const

Returns HotCountThreshold if set.

typename vector_type::const_iterator iterator

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

reference emplace_back(ArgTypes &&... Args)

iterator erase(const_iterator CI)

typename SuperClass::const_iterator const_iterator

typename SuperClass::iterator iterator

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

T * Allocate(size_t num=1)

Allocate space for an array of objects without constructing them.

void DestroyAll()

Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...

StringRef - Represent a constant reference to a string, i.e.

void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)

Prepare to run on a specific machine function.

bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)

Tail duplicate a single basic block into its predecessors, and then clean up.

static bool isSimpleBB(MachineBasicBlock *TailBB)

True if this BB has only one unconditional jump.

bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)

Returns true if TailBB can successfully be duplicated into PredBB.

bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)

Determine if it is profitable to duplicate this block.

virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const

Reverses the branch condition of the specified condition list, returning false on success and true if...

virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const

Remove the branching code at the end of the specific MBB.

virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const

Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....

virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const

Insert branch code into the end of the specified MachineBasicBlock.

virtual unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const

Returns the target-specific default value for tail duplication.

unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const

virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const

Return the maximum amount of bytes allowed to be emitted when padding for alignment.

virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const

Return the preferred loop alignment.

virtual bool alignLoopsWithOptSize() const

Should loops be aligned even when the function is marked OptSize (but not MinSize).

bool requiresStructuredCFG() const

virtual const TargetInstrInfo * getInstrInfo() const

virtual const TargetRegisterInfo * getRegisterInfo() const =0

Return the target's register information.

virtual const TargetLowering * getTargetLowering() const

An efficient, type-erasing, non-owning reference to a callable.

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

initializer< Ty > init(const Ty &Val)

LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)

Estimate the "quality" of a given node order in CFG.

LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)

Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")

Append to path.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

LLVM_ABI void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &)

void stable_sort(R &&Range)

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy

Provide the ModuleAnalysisManager to Function proxy.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

cl::opt< bool > ApplyExtTspWithoutProfile

constexpr from_range_t from_range

LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

cl::opt< unsigned > ProfileLikelyProb

void erase(Container &C, ValueType V)

Wrapper function to remove a value from a container:

cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))

auto reverse(ContainerTy &&C)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))

Definition MachineBlockPlacement.cpp:242

bool isFunctionInPrintList(StringRef FunctionName)

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

FunctionAddr VTableAddr Count

CodeGenOptLevel

Code generation optimization level.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)

Construct a range iterator which begins at It and moves forwards until End is reached,...

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

MutableArrayRef(T &OneElt) -> MutableArrayRef< T >

LLVM_ABI void initializeMachineBlockPlacementLegacyPass(PassRegistry &)

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

cl::opt< unsigned > StaticLikelyProb

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)

Print the block frequency Freq relative to the current functions entry frequency.

LLVM_ABI char & MachineBlockPlacementID

MachineBlockPlacement - This pass places basic blocks based on branch probabilities.

Definition MachineBlockPlacement.cpp:682

cl::opt< bool > EnableExtTspBlockPlacement

LLVM_ABI char & MachineBlockPlacementStatsID

MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...

Definition MachineBlockPlacement.cpp:3895

LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.