LLVM: lib/CodeGen/MachineBlockPlacement.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

66#include

67#include

68#include

69#include

70#include

71#include

72#include

73#include

74#include

75

76using namespace llvm;

77

78#define DEBUG_TYPE "block-placement"

79

80STATISTIC(NumCondBranches, "Number of conditional branches");

81STATISTIC(NumUncondBranches, "Number of unconditional branches");

83 "Potential frequency of taking conditional branches");

85 "Potential frequency of taking unconditional branches");

86

88 "align-all-blocks",

89 cl::desc("Force the alignment of all blocks in the function in log2 format "

90 "(e.g 4 means align on 16B boundaries)."),

92

94 "align-all-nofallthru-blocks",

95 cl::desc("Force the alignment of all blocks that have no fall-through "

96 "predecessors (i.e. don't add nops that are executed). In log2 "

97 "format (e.g 4 means align on 16B boundaries)."),

99

101 "max-bytes-for-alignment",

102 cl::desc("Forces the maximum bytes allowed to be emitted when padding for "

103 "alignment"),

105

106

108 "block-placement-exit-block-bias",

109 cl::desc("Block frequency percentage a loop exit block needs "

110 "over the original exit to be considered the new exit."),

112

113

114

115

117 "loop-to-cold-block-ratio",

118 cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "

119 "(frequency of block) is greater than this ratio"),

121

124 cl::desc("Force outlining cold blocks from loops."),

126

129 cl::desc("Model the cost of loop rotation more "

130 "precisely by using profile data."),

132

135 cl::desc("Force the use of precise cost "

136 "loop rotation strategy."),

138

140 "misfetch-cost",

141 cl::desc("Cost that models the probabilistic risk of an instruction "

142 "misfetch due to a jump comparing to falling through, whose cost "

143 "is zero."),

145

147 cl::desc("Cost of jump instructions."),

151 cl::desc("Perform tail duplication during placement. "

152 "Creates more fallthrough opportunities in "

153 "outline branches."),

155

158 cl::desc("Perform branch folding during placement. "

159 "Reduces code size."),

161

162

164 "tail-dup-placement-threshold",

165 cl::desc("Instruction cutoff for tail duplication during layout. "

166 "Tail merging during layout is forced to have a threshold "

167 "that won't conflict."),

169

170

172 "tail-dup-placement-aggressive-threshold",

173 cl::desc("Instruction cutoff for aggressive tail duplication during "

174 "layout. Used at -O3. Tail merging during layout is forced to "

175 "have a threshold that won't conflict."),

177

178

180 "tail-dup-placement-penalty",

182 "Cost penalty for blocks that can avoid breaking CFG by copying. "

183 "Copying can increase fallthrough, but it also increases icache "

184 "pressure. This parameter controls the penalty to account for that. "

185 "Percent as integer."),

187

188

190 "tail-dup-profile-percent-threshold",

191 cl::desc("If profile count information is used in tail duplication cost "

192 "model, the gained fall through number from tail duplication "

193 "should be at least this percent of hot count."),

195

196

198 "triangle-chain-count",

199 cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "

200 "triangle tail duplication heuristic to kick in. 0 to disable."),

202

203

204

205

206

207

209 "renumber-blocks-before-view",

211 "If true, basic blocks are re-numbered before MBP layout is printed "

212 "into a dot graph. Only used when a function is being printed."),

214

216 "ext-tsp-block-placement-max-blocks",

217 cl::desc("Maximum number of basic blocks in a function to run ext-TSP "

218 "block placement."),

220

221

224 cl::desc("Use ext-tsp for size-aware block placement."));

225

226namespace llvm {

231

232

233

234

236

237

238

240}

241

242namespace {

243

244class BlockChain;

245

246

248

249

250

251

252

253

254

255

256

257

258

259

260class BlockChain {

261

262

263

264

266

267

268

269

270

271

272

273 BlockToChainMapType &BlockToChain;

274

275public:

276

277

278

279

280

281 BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)

282 : Blocks(1, BB), BlockToChain(BlockToChain) {

283 assert(BB && "Cannot create a chain with a null basic block");

284 BlockToChain[BB] = this;

285 }

286

287

290

291

292 iterator begin() { return Blocks.begin(); }

294

295

296 iterator end() { return Blocks.end(); }

298

300 for (iterator i = begin(); i != end(); ++i) {

301 if (*i == BB) {

303 return true;

304 }

305 }

306 return false;

307 }

308

309

310

311

312

313

314

316 assert(BB && "Can't merge a null block.");

317 assert(Blocks.empty() && "Can't merge into an empty chain.");

318

319

320 if (!Chain) {

321 assert(!BlockToChain[BB] &&

322 "Passed chain is null, but BB has entry in BlockToChain.");

323 Blocks.push_back(BB);

324 BlockToChain[BB] = this;

325 return;

326 }

327

328 assert(BB == *Chain->begin() && "Passed BB is not head of Chain.");

329 assert(Chain->begin() != Chain->end());

330

331

332

334 Blocks.push_back(ChainBB);

335 assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain.");

336 BlockToChain[ChainBB] = this;

337 }

338 }

339

340#ifndef NDEBUG

341

345 }

346#endif

347

348

349

350

351

352

353

354

355

356

357 unsigned UnscheduledPredecessors = 0;

358};

359

361

363

364

365 struct BlockAndTailDupResult {

367 bool ShouldTailDup;

368 };

369

370

371 struct WeightedEdge {

375 };

376

377

380

381

383

384

386

387

389

390

391 std::unique_ptr MBFI;

392

393

395

396

397

398

400

401

403

404

406

407

409

411

413

414

415

416

417

418

420

421

423

424 unsigned TailDupSize;

425

426

427

428 bool UseProfileCount = false;

429

430

431

432

433

434

435

436

438

439

440

441

442

443

444

446

447#ifndef NDEBUG

448

449

450

451

453#endif

454

455

456

458 if (UseProfileCount) {

459 auto Count = MBFI->getBlockProfileCount(BB);

460 if (Count)

462 else

464 } else

465 return MBFI->getBlockFreq(BB);

466 }

467

468

470 void initTailDupThreshold();

471

472

473

474 void markChainSuccessors(const BlockChain &Chain,

476 const BlockFilterSet *BlockFilter = nullptr);

477

478

479

480 void markBlockSuccessors(const BlockChain &Chain, const MachineBasicBlock *BB,

482 const BlockFilterSet *BlockFilter = nullptr);

483

485 collectViableSuccessors(const MachineBasicBlock *BB, const BlockChain &Chain,

486 const BlockFilterSet *BlockFilter,

489 BlockFilterSet *BlockFilter);

492 BlockFilterSet *BlockFilter);

493 bool repeatedlyTailDuplicateBlock(

496 BlockFilterSet *BlockFilter,

499 bool

501 BlockChain &Chain, BlockFilterSet *BlockFilter,

504 bool &DuplicatedToLPred);

507 const BlockChain &SuccChain,

510 const BlockChain &Chain,

511 const BlockFilterSet *BlockFilter);

512 BlockAndTailDupResult selectBestSuccessor(const MachineBasicBlock *BB,

513 const BlockChain &Chain,

514 const BlockFilterSet *BlockFilter);

516 selectBestCandidateBlock(const BlockChain &Chain,

519 getFirstUnplacedBlock(const BlockChain &PlacedChain,

522 getFirstUnplacedBlock(const BlockChain &PlacedChain,

524 const BlockFilterSet *BlockFilter);

525

526

527

528

529

530

533 const BlockFilterSet *BlockFilter);

534

536 BlockFilterSet *BlockFilter = nullptr);

537 bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,

540 const BlockFilterSet &LoopBlockSet);

542 const BlockFilterSet &LoopBlockSet);

546 const BlockFilterSet &LoopBlockSet);

549 const BlockFilterSet &LoopBlockSet);

551 const BlockFilterSet &LoopBlockSet);

553 const BlockFilterSet &LoopBlockSet,

555 BlockFilterSet collectLoopBlockSet(const MachineLoop &L);

556 void buildLoopChains(const MachineLoop &L);

557 void rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,

558 BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);

559 void rotateLoopWithProfile(BlockChain &LoopChain, const MachineLoop &L,

560 const BlockFilterSet &LoopBlockSet);

561 void buildCFGChains();

562 void optimizeBranches();

563 void alignBlocks();

564

565

567

568

572 const BlockFilterSet *BlockFilter);

573

574

577 const BlockChain &Chain, const BlockFilterSet *BlockFilter);

578

579

580 BlockAndTailDupResult getBestTrellisSuccessor(

584 const BlockFilterSet *BlockFilter);

585

586

587 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(

590

591

592

595 const BlockChain &Chain,

596 const BlockFilterSet *BlockFilter);

597

598

599

600 void precomputeTriangleChains();

601

602

603 void applyExtTsp(bool OptForSize);

604

605

606 void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder);

607

608

609 void createCFGChainExtTsp();

610

611public:

612 static char ID;

613

616 }

617

619

620 bool allowTailDupPlacement() const {

622 return TailDupPlacement && F->getTarget().requiresStructuredCFG();

623 }

624

634 }

635};

636

637}

638

639char MachineBlockPlacement::ID = 0;

640

642

644 "Branch Probability Basic Block Placement", false, false)

652

653#ifndef NDEBUG

654

655

656

658 std::string Result;

661 OS << " ('" << BB->getName() << "')";

663 return Result;

664}

665#endif

666

667

668

669

670

671

672

673void MachineBlockPlacement::markChainSuccessors(

675 const BlockFilterSet *BlockFilter) {

676

677

679 markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);

680 }

681}

682

683

684

685

686

687

688

689void MachineBlockPlacement::markBlockSuccessors(

691 const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {

692

693

694

695

697 if (BlockFilter && !BlockFilter->count(Succ))

698 continue;

699 BlockChain &SuccChain = *BlockToChain[Succ];

700

701 if (&Chain == &SuccChain || Succ == LoopHeaderBB)

702 continue;

703

704

705

706 if (SuccChain.UnscheduledPredecessors == 0 ||

707 --SuccChain.UnscheduledPredecessors > 0)

708 continue;

709

710 auto *NewBB = *SuccChain.begin();

711 if (NewBB->isEHPad())

713 else

715 }

716}

717

718

719

720

721

724 const BlockFilterSet *BlockFilter,

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

744 bool SkipSucc = false;

745 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {

746 SkipSucc = true;

747 } else {

748 BlockChain *SuccChain = BlockToChain[Succ];

749 if (SuccChain == &Chain) {

750 SkipSucc = true;

751 } else if (Succ != *SuccChain->begin()) {

753 << " -> Mid chain!\n");

754 continue;

755 }

756 }

757 if (SkipSucc)

759 else

761 }

762

763 return AdjustedSumProb;

764}

765

766

767

774 if (SuccProbN >= SuccProbD)

776 else

778

779 return SuccProb;

780}

781

782

783static bool

787 return false;

788

789 if (Successors.count(&BB))

790 return false;

792 if (!Successors.count(Succ))

793 return false;

794 return true;

795}

796

797

798

799

800bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {

801

802

803

804 bool IsSimple = TailDup.isSimpleBB(BB);

805

807 return false;

809}

810

811

812

813

814

815

816

821 return (Gain / ThresholdProb) >= EntryFreq;

822}

823

824

825

826

827

828

829bool MachineBlockPlacement::isProfitableToTailDup(

832 const BlockFilterSet *BlockFilter) {

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

858

859 auto AdjustedSuccSumProb =

860 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);

862 auto BBFreq = MBFI->getBlockFreq(BB);

863 auto SuccFreq = MBFI->getBlockFreq(Succ);

867

868

869 if (SuccSuccs.size() == 0)

871

873

876 if (Prob > BestSuccSucc)

877 BestSuccSucc = Prob;

878 if (PDom == nullptr)

879 if (MPDT->dominates(SuccSucc, Succ)) {

880 PDom = SuccSucc;

881 break;

882 }

883 }

884

885

888 if (SuccPred == Succ || SuccPred == BB ||

889 BlockToChain[SuccPred] == &Chain ||

890 (BlockFilter && !BlockFilter->count(SuccPred)))

891 continue;

892 auto Freq =

893 MBFI->getBlockFreq(SuccPred) * MBPI->getEdgeProbability(SuccPred, Succ);

894 if (Freq > SuccBestPred)

895 SuccBestPred = Freq;

896 }

897

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918 if (PDom == nullptr || !Succ->isSuccessor(PDom)) {

925 BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;

927 }

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962 if (UProb > AdjustedSuccSumProb / 2 &&

963 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,

964 Chain, BlockFilter))

965

967 (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),

968 EntryFreq);

969

971 (Qout + std::min(Qin, F) * AdjustedSuccSumProb +

972 std::max(Qin, F) * UProb),

973 EntryFreq);

974}

975

976

977

978

979

980

981

982

983bool MachineBlockPlacement::isTrellis(

986 const BlockChain &Chain, const BlockFilterSet *BlockFilter) {

987

988

989 if (BB->succ_size() != 2 || ViableSuccs.size() != 2)

990 return false;

991

994

996

998 int PredCount = 0;

999 for (auto *SuccPred : Succ->predecessors()) {

1000

1001 if (Successors.count(SuccPred)) {

1002

1004 if (!Successors.count(CheckSucc))

1005 return false;

1006 continue;

1007 }

1008 const BlockChain *PredChain = BlockToChain[SuccPred];

1009 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||

1010 PredChain == &Chain || PredChain == BlockToChain[Succ])

1011 continue;

1012 ++PredCount;

1013

1014 if (!SeenPreds.insert(SuccPred).second)

1015 continue;

1017 return false;

1018 }

1019

1020

1021 if (PredCount < 1)

1022 return false;

1023 }

1024 return true;

1025}

1026

1027

1028

1029

1030

1031

1032std::pair<MachineBlockPlacement::WeightedEdge,

1033 MachineBlockPlacement::WeightedEdge>

1034MachineBlockPlacement::getBestNonConflictingEdges(

1037 Edges) {

1038

1039

1040

1041

1042

1043

1044

1045 auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };

1046

1049 auto BestA = Edges[0].begin();

1050 auto BestB = Edges[1].begin();

1051

1052

1053 if (BestA->Src == BestB->Src) {

1054

1055 auto SecondBestA = std::next(BestA);

1056 auto SecondBestB = std::next(BestB);

1057 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;

1058 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;

1059 if (BestAScore < BestBScore)

1060 BestA = SecondBestA;

1061 else

1062 BestB = SecondBestB;

1063 }

1064

1065 if (BestB->Src == BB)

1067 return std::make_pair(*BestA, *BestB);

1068}

1069

1070

1071

1072

1073

1074

1075

1076

1077MachineBlockPlacement::BlockAndTailDupResult

1078MachineBlockPlacement::getBestTrellisSuccessor(

1082 const BlockFilterSet *BlockFilter) {

1083

1084 BlockAndTailDupResult Result = {nullptr, false};

1087

1088

1089

1090

1091 if (Successors.size() != 2 || ViableSuccs.size() != 2)

1093

1094

1096 int SuccIndex = 0;

1097 for (auto *Succ : ViableSuccs) {

1099

1100 if (SuccPred != BB)

1101 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||

1102 BlockToChain[SuccPred] == &Chain ||

1103 BlockToChain[SuccPred] == BlockToChain[Succ])

1104 continue;

1105 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *

1107 Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});

1108 }

1109 ++SuccIndex;

1110 }

1111

1112

1113 WeightedEdge BestA, BestB;

1114 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);

1115

1116 if (BestA.Src != BB) {

1117

1118

1119

1120 LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");

1122 }

1123

1124

1125

1126

1127 if (BestA.Dest == BestB.Src) {

1128

1129

1132

1133 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&

1134 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&

1135 isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),

1136 Chain, BlockFilter)) {

1140 << ", probability: " << Succ2Prob

1141 << " (Tail Duplicate)\n");

1143 Result.ShouldTailDup = true;

1145 }

1146 }

1147

1148

1149 ComputedEdges[BestB.Src] = {BestB.Dest, false};

1150

1151 auto TrellisSucc = BestA.Dest;

1155 << ", probability: " << SuccProb << " (Trellis)\n");

1156 Result.BB = TrellisSucc;

1158}

1159

1160

1161

1162

1163bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(

1165 const BlockChain &Chain, const BlockFilterSet *BlockFilter) {

1166 if (!shouldTailDuplicate(Succ))

1167 return false;

1168

1169

1170 bool Duplicate = true;

1171

1172 unsigned int NumDup = 0;

1173

1174

1178

1179

1180

1181 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||

1182 (BlockToChain[Pred] == &Chain && !Succ->succ_empty()))

1183 continue;

1186

1187

1188

1189

1190

1191

1192

1193

1194

1195

1196

1197

1198

1199

1200

1201

1202

1203

1204

1205

1206

1207

1208

1209

1210

1211

1212

1213

1214

1215

1216

1217

1218

1219 continue;

1220 Duplicate = false;

1221 continue;

1222 }

1223 NumDup++;

1224 }

1225

1226

1227 if (NumDup == 0)

1228 return false;

1229

1230

1231

1232 if (F->getFunction().hasProfileData())

1233 return true;

1234

1235

1236

1237

1238

1240 return true;

1241

1242

1243 NumDup++;

1244

1245

1246

1247

1248

1249

1250

1251

1252

1253

1254

1255

1256

1257

1258

1259

1260

1261

1262

1263 if ((NumDup > Succ->succ_size()) || !Duplicate)

1264 return false;

1265

1266 return true;

1267}

1268

1269

1270

1271

1272

1273

1274

1275

1276

1277

1278

1279

1280

1281

1282

1283

1284

1285

1286void MachineBlockPlacement::precomputeTriangleChains() {

1287 struct TriangleChain {

1288 std::vector<MachineBasicBlock *> Edges;

1289

1291 : Edges({src, dst}) {}

1292

1294 assert(getKey()->isSuccessor(dst) &&

1295 "Attempting to append a block that is not a successor.");

1296 Edges.push_back(dst);

1297 }

1298

1299 unsigned count() const { return Edges.size() - 1; }

1300

1302 };

1303

1305 return;

1306

1307 LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n");

1308

1309

1312

1314 continue;

1317 if (!MPDT->dominates(Succ, &BB))

1318 continue;

1319 PDom = Succ;

1320 break;

1321 }

1322

1323

1324 if (PDom == nullptr)

1325 continue;

1326

1328 continue;

1329

1330

1331 if (!shouldTailDuplicate(PDom))

1332 continue;

1333 bool CanTailDuplicate = true;

1334

1335

1337 if (Pred == &BB)

1338 continue;

1340 CanTailDuplicate = false;

1341 break;

1342 }

1343 }

1344

1345

1346 if (!CanTailDuplicate)

1347 continue;

1348

1349

1350

1351

1352

1353 auto Found = TriangleChainMap.find(&BB);

1354

1355

1356 if (Found != TriangleChainMap.end()) {

1357 TriangleChain Chain = std::move(Found->second);

1358 TriangleChainMap.erase(Found);

1359 Chain.append(PDom);

1360 TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));

1361 } else {

1362 auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);

1363 assert(InsertResult.second && "Block seen twice.");

1364 (void)InsertResult;

1365 }

1366 }

1367

1368

1369

1370

1371 for (auto &ChainPair : TriangleChainMap) {

1372 TriangleChain &Chain = ChainPair.second;

1373

1374

1375

1377 continue;

1379 Chain.Edges.pop_back();

1383 << " as pre-computed based on triangles.\n");

1384

1385 auto InsertResult = ComputedEdges.insert({src, {dst, true}});

1386 assert(InsertResult.second && "Block seen twice.");

1387 (void)InsertResult;

1388

1389 dst = src;

1390 }

1391 }

1392}

1393

1394

1395

1404

1405

1406

1407

1408

1409

1410

1411

1412

1413

1415 }

1416 }

1418}

1419

1420

1421

1422

1423

1424

1425

1426

1427

1428bool MachineBlockPlacement::hasBetterLayoutPredecessor(

1432 const BlockFilterSet *BlockFilter) {

1433

1434

1435 if (SuccChain.UnscheduledPredecessors == 0)

1436 return false;

1437

1438

1439

1440

1441

1442

1443

1444

1445

1446

1447

1448

1449

1450

1451

1452

1453

1454

1455

1456

1457

1458

1459

1460

1461

1462

1463

1464

1465

1466

1467

1468

1469

1470

1471

1472

1473

1474

1475

1476

1477

1478

1479

1480

1481

1482

1483

1484

1485

1486

1487

1488

1489

1490

1491

1492

1493

1494

1495

1496

1497

1498

1499

1500

1501

1502

1503

1504

1505

1506

1507

1508

1509

1510

1511

1512

1513

1514

1515

1516

1517

1518

1519

1520

1521

1522

1523

1524

1525

1526

1527

1528

1529

1530

1531

1532

1533

1534

1535

1536

1537

1538

1539

1540

1541

1542

1543

1544

1545

1546

1547

1548

1549

1550

1552

1553

1554

1555 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;

1556 bool BadCFGConflict = false;

1557

1559 BlockChain *PredChain = BlockToChain[Pred];

1560 if (Pred == Succ || PredChain == &SuccChain ||

1561 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||

1562 Pred != *std::prev(PredChain->end()) ||

1563

1564

1565

1566 (Pred == BB))

1567 continue;

1568

1569

1570

1571

1572

1573

1574

1575

1576

1577

1578

1579

1580

1583 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {

1584 BadCFGConflict = true;

1585 break;

1586 }

1587 }

1588

1589 if (BadCFGConflict) {

1591 << SuccProb << " (prob) (non-cold CFG conflict)\n");

1592 return true;

1593 }

1594

1595 return false;

1596}

1597

1598

1599

1600

1601

1602

1603

1604

1605

1606

1607

1608MachineBlockPlacement::BlockAndTailDupResult

1609MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,

1610 const BlockChain &Chain,

1611 const BlockFilterSet *BlockFilter) {

1613

1614 BlockAndTailDupResult BestSucc = {nullptr, false};

1616

1618 auto AdjustedSumProb =

1619 collectViableSuccessors(BB, Chain, BlockFilter, Successors);

1620

1622 << "\n");

1623

1624

1625

1626 auto FoundEdge = ComputedEdges.find(BB);

1627 if (FoundEdge != ComputedEdges.end()) {

1629 ComputedEdges.erase(FoundEdge);

1630 BlockChain *SuccChain = BlockToChain[Succ];

1631 if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&

1632 SuccChain != &Chain && Succ == *SuccChain->begin())

1633 return FoundEdge->second;

1634 }

1635

1636

1637

1638 if (isTrellis(BB, Successors, Chain, BlockFilter))

1639 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,

1640 BlockFilter);

1641

1642

1643

1644

1646 DupCandidates;

1651

1652 BlockChain &SuccChain = *BlockToChain[Succ];

1653

1654

1655 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,

1656 Chain, BlockFilter)) {

1657

1658 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))

1660 continue;

1661 }

1662

1665 << ", probability: " << SuccProb

1666 << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")

1667 << "\n");

1668

1669 if (BestSucc.BB && BestProb >= SuccProb) {

1670 LLVM_DEBUG(dbgs() << " Not the best candidate, continuing\n");

1671 continue;

1672 }

1673

1674 LLVM_DEBUG(dbgs() << " Setting it as best candidate\n");

1675 BestSucc.BB = Succ;

1676 BestProb = SuccProb;

1677 }

1678

1679

1680

1681

1682

1684 [](std::tuple<BranchProbability, MachineBasicBlock *> L,

1685 std::tuple<BranchProbability, MachineBasicBlock *> R) {

1686 return std::get<0>(L) > std::get<0>(R);

1687 });

1688 for (auto &Tup : DupCandidates) {

1691 std::tie(DupProb, Succ) = Tup;

1692 if (DupProb < BestProb)

1693 break;

1694 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&

1695 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {

1697 << ", probability: " << DupProb

1698 << " (Tail Duplicate)\n");

1699 BestSucc.BB = Succ;

1700 BestSucc.ShouldTailDup = true;

1701 break;

1702 }

1703 }

1704

1705 if (BestSucc.BB)

1707

1708 return BestSucc;

1709}

1710

1711

1712

1713

1714

1715

1716

1717

1718

1719

1720

1721MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(

1723

1724

1725

1726

1728 return BlockToChain.lookup(BB) == &Chain;

1729 });

1730

1731 if (WorkList.empty())

1732 return nullptr;

1733

1734 bool IsEHPad = WorkList[0]->isEHPad();

1735

1740 "EHPad mismatch between block and work list.");

1741

1742 BlockChain &SuccChain = *BlockToChain[MBB];

1743 if (&SuccChain == &Chain)

1744 continue;

1745

1746 assert(SuccChain.UnscheduledPredecessors == 0 &&

1747 "Found CFG-violating block");

1748

1752 << " (freq)\n");

1753

1754

1755

1756

1757

1758

1759

1760

1761

1762

1763

1764

1765

1766

1767

1768

1769

1770

1771

1772 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))

1773 continue;

1774

1775 BestBlock = MBB;

1776 BestFreq = CandidateFreq;

1777 }

1778

1779 return BestBlock;

1780}

1781

1782

1783

1784

1785

1786

1787

1788

1790 const BlockChain &PlacedChain,

1792

1794 ++I) {

1795 if (BlockToChain[&*I] != &PlacedChain) {

1796 PrevUnplacedBlockIt = I;

1797

1798

1799

1800 return *BlockToChain[&*I]->begin();

1801 }

1802 }

1803 return nullptr;

1804}

1805

1806

1807

1808

1809

1810

1811

1812

1813

1814

1815

1817 const BlockChain &PlacedChain,

1818 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,

1819 const BlockFilterSet *BlockFilter) {

1820 assert(BlockFilter);

1821 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();

1822 ++PrevUnplacedBlockInFilterIt) {

1823 BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt];

1824 if (C != &PlacedChain) {

1825 return *C->begin();

1826 }

1827 }

1828 return nullptr;

1829}

1830

1831void MachineBlockPlacement::fillWorkLists(

1833 const BlockFilterSet *BlockFilter = nullptr) {

1834 BlockChain &Chain = *BlockToChain[MBB];

1835 if (!UpdatedPreds.insert(&Chain).second)

1836 return;

1837

1839 Chain.UnscheduledPredecessors == 0 &&

1840 "Attempting to place block with unscheduled predecessors in worklist.");

1842 assert(BlockToChain[ChainBB] == &Chain &&

1843 "Block in chain doesn't match BlockToChain map.");

1845 if (BlockFilter && !BlockFilter->count(Pred))

1846 continue;

1847 if (BlockToChain[Pred] == &Chain)

1848 continue;

1849 ++Chain.UnscheduledPredecessors;

1850 }

1851 }

1852

1853 if (Chain.UnscheduledPredecessors != 0)

1854 return;

1855

1859 else

1861}

1862

1863void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,

1864 BlockChain &Chain,

1865 BlockFilterSet *BlockFilter) {

1866 assert(HeadBB && "BB must not be null.\n");

1867 assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");

1869 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;

1870 if (BlockFilter)

1871 PrevUnplacedBlockInFilterIt = BlockFilter->begin();

1872

1874 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);

1876 while (true) {

1877 assert(BB && "null block found at end of chain in loop.");

1878 assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");

1879 assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");

1880

1881

1882

1883 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);

1885 bool ShouldTailDup = Result.ShouldTailDup;

1886 if (allowTailDupPlacement())

1887 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(

1888 BB, BestSucc, Chain, BlockFilter));

1889

1890

1891

1892

1893 if (!BestSucc)

1894 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);

1895 if (!BestSucc)

1896 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);

1897

1898 if (!BestSucc) {

1899 if (BlockFilter)

1900 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,

1901 BlockFilter);

1902 else

1903 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);

1904 if (!BestSucc)

1905 break;

1906

1907 LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "

1908 "layout successor until the CFG reduces\n");

1909 }

1910

1911

1912

1913 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {

1914 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,

1915 BlockFilter, PrevUnplacedBlockIt,

1916 PrevUnplacedBlockInFilterIt);

1917

1918

1920 continue;

1921 }

1922

1923

1924 BlockChain &SuccChain = *BlockToChain[BestSucc];

1925

1926

1927 SuccChain.UnscheduledPredecessors = 0;

1930 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);

1931 Chain.merge(BestSucc, &SuccChain);

1932 BB = *std::prev(Chain.end());

1933 }

1934

1935 LLVM_DEBUG(dbgs() << "Finished forming chain for header block "

1937}

1938

1939

1940

1941

1942

1943

1944

1945

1946

1947

1948

1949

1950

1951

1952

1953bool MachineBlockPlacement::canMoveBottomBlockToTop(

1955 if (BottomBlock->pred_size() != 1)

1956 return true;

1959 return true;

1960

1962 if (OtherBB == BottomBlock)

1964 if (OtherBB == OldTop)

1965 return false;

1966

1967 return true;

1968}

1969

1970

1972MachineBlockPlacement::TopFallThroughFreq(const MachineBasicBlock *Top,

1973 const BlockFilterSet &LoopBlockSet) {

1976 BlockChain *PredChain = BlockToChain[Pred];

1977 if (!LoopBlockSet.count(Pred) &&

1978 (!PredChain || Pred == *std::prev(PredChain->end()))) {

1979

1980

1982 bool TopOK = true;

1985 BlockChain *SuccChain = BlockToChain[Succ];

1986

1987

1988 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&

1989 (!SuccChain || Succ == *SuccChain->begin())) {

1990 TopOK = false;

1991 break;

1992 }

1993 }

1994 if (TopOK) {

1997 if (EdgeFreq > MaxFreq)

1998 MaxFreq = EdgeFreq;

1999 }

2000 }

2001 }

2002 return MaxFreq;

2003}

2004

2005

2006

2007

2008

2009

2010

2011

2012

2013

2014

2015

2016

2017

2018

2019

2020

2021

2022

2023

2024

2025

2026BlockFrequency MachineBlockPlacement::FallThroughGains(

2028 const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet) {

2029 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);

2031 if (ExitBB)

2032 FallThrough2Exit =

2033 MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, ExitBB);

2035 MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, OldTop);

2036

2037

2041 if (!LoopBlockSet.count(Pred))

2042 continue;

2043 BlockChain *PredChain = BlockToChain[Pred];

2044 if (!PredChain || Pred == *std::prev(PredChain->end())) {

2047 if (EdgeFreq > FallThroughFromPred) {

2048 FallThroughFromPred = EdgeFreq;

2049 BestPred = Pred;

2050 }

2051 }

2052 }

2053

2054

2055

2057 if (BestPred) {

2059 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))

2060 continue;

2061 if (ComputedEdges.contains(Succ))

2062 continue;

2063 BlockChain *SuccChain = BlockToChain[Succ];

2064 if ((SuccChain && (Succ != *SuccChain->begin())) ||

2065 (SuccChain == BlockToChain[BestPred]))

2066 continue;

2067 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *

2069 if (EdgeFreq > NewFreq)

2070 NewFreq = EdgeFreq;

2071 }

2072 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *

2074 if (NewFreq > OrigEdgeFreq) {

2075

2076

2077

2080 }

2081 }

2082

2086 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;

2087 if (Gains > Lost)

2088 Result = Gains - Lost;

2090}

2091

2092

2093

2094

2095

2096

2097

2098

2099

2100

2101

2102

2103

2104

2105

2106

2107

2108

2109

2110

2111

2112

2113

2116 const BlockFilterSet &LoopBlockSet) {

2117

2118

2119

2120 BlockChain &HeaderChain = *BlockToChain[OldTop];

2121 if (!LoopBlockSet.count(*HeaderChain.begin()))

2122 return OldTop;

2123 if (OldTop != *HeaderChain.begin())

2124 return OldTop;

2125

2127 << "\n");

2128

2132 if (!LoopBlockSet.count(Pred))

2133 continue;

2134 if (Pred == L.getHeader())

2135 continue;

2137 << Pred->succ_size() << " successors, "

2138 << printBlockFreq(MBFI->getMBFI(), *Pred) << " freq\n");

2140 continue;

2141

2145 if (OtherBB == OldTop)

2147 }

2148

2149 if (!canMoveBottomBlockToTop(Pred, OldTop))

2150 continue;

2151

2153 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);

2155 (Gains > BestGains ||

2157 BestPred = Pred;

2158 BestGains = Gains;

2159 }

2160 }

2161

2162

2163 if (!BestPred) {

2165 return OldTop;

2166 }

2167

2168

2169 while (BestPred->pred_size() == 1 &&

2170 (*BestPred->pred_begin())->succ_size() == 1 &&

2171 *BestPred->pred_begin() != L.getHeader())

2173

2175 return BestPred;

2176}

2177

2178

2179

2180

2181

2183MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,

2184 const BlockFilterSet &LoopBlockSet) {

2185

2186

2187

2188

2189

2190

2191

2193 return L.getHeader();

2194

2197 while (NewTop != OldTop) {

2198 OldTop = NewTop;

2199 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);

2200 if (NewTop != OldTop)

2201 ComputedEdges[NewTop] = {OldTop, false};

2202 }

2203 return NewTop;

2204}

2205

2206

2207

2208

2209

2210

2212MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,

2213 const BlockFilterSet &LoopBlockSet,

2215

2216

2217

2218

2219

2220

2221

2222

2223 BlockChain &HeaderChain = *BlockToChain[L.getHeader()];

2224 if (!LoopBlockSet.count(*HeaderChain.begin()))

2225 return nullptr;

2226

2228 unsigned BestExitLoopDepth = 0;

2230

2231

2232

2234

2238 BlockChain &Chain = *BlockToChain[MBB];

2239

2240

2241 if (MBB != *std::prev(Chain.end()))

2242 continue;

2243

2244

2245

2246

2247

2249 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;

2250 bool HasLoopingSucc = false;

2253 continue;

2254 if (Succ == MBB)

2255 continue;

2256 BlockChain &SuccChain = *BlockToChain[Succ];

2257

2258 if (&Chain == &SuccChain) {

2260 << getBlockName(Succ) << " (chain conflict)\n");

2261 continue;

2262 }

2263

2265 if (LoopBlockSet.count(Succ)) {

2267 << getBlockName(Succ) << " (" << SuccProb << ")\n");

2268 HasLoopingSucc = true;

2269 continue;

2270 }

2271

2272 unsigned SuccLoopDepth = 0;

2274 SuccLoopDepth = ExitLoop->getLoopDepth();

2275 if (ExitLoop->contains(&L))

2276 BlocksExitingToOuterLoop.insert(MBB);

2277 }

2278

2279 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;

2282 << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("

2283 << printBlockFreq(MBFI->getMBFI(), ExitEdgeFreq) << ")\n");

2284

2285

2286

2287

2289 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||

2290 ExitEdgeFreq > BestExitEdgeFreq ||

2292 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {

2293 BestExitEdgeFreq = ExitEdgeFreq;

2294 ExitingBB = MBB;

2295 }

2296 }

2297

2298 if (!HasLoopingSucc) {

2299

2300 ExitingBB = OldExitingBB;

2301 BestExitEdgeFreq = OldBestExitEdgeFreq;

2302 }

2303 }

2304

2305

2306 if (!ExitingBB) {

2308 dbgs() << " No other candidate exit blocks, using loop header\n");

2309 return nullptr;

2310 }

2311 if (L.getNumBlocks() == 1) {

2312 LLVM_DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");

2313 return nullptr;

2314 }

2315

2316

2317

2318

2319 if (!BlocksExitingToOuterLoop.empty() &&

2320 !BlocksExitingToOuterLoop.count(ExitingBB))

2321 return nullptr;

2322

2324 << "\n");

2325 ExitFreq = BestExitEdgeFreq;

2326 return ExitingBB;

2327}

2328

2329

2330

2331

2332

2333bool MachineBlockPlacement::hasViableTopFallthrough(

2334 const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet) {

2336 BlockChain *PredChain = BlockToChain[Pred];

2337 if (!LoopBlockSet.count(Pred) &&

2338 (!PredChain || Pred == *std::prev(PredChain->end()))) {

2339

2340

2342 bool TopOK = true;

2345 BlockChain *SuccChain = BlockToChain[Succ];

2346

2347

2348 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {

2349 TopOK = false;

2350 break;

2351 }

2352 }

2353 if (TopOK)

2354 return true;

2355 }

2356 }

2357 return false;

2358}

2359

2360

2361

2362

2363

2364

2365

2366void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,

2369 const BlockFilterSet &LoopBlockSet) {

2370 if (!ExitingBB)

2371 return;

2372

2375

2376

2377 if (Bottom == ExitingBB)

2378 return;

2379

2380

2382 return;

2383

2384 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);

2385

2386

2387

2388

2389 if (ViableTopFallthrough) {

2391 BlockChain *SuccChain = BlockToChain[Succ];

2392 if (!LoopBlockSet.count(Succ) &&

2393 (!SuccChain || Succ == *SuccChain->begin()))

2394 return;

2395 }

2396

2397

2398

2399 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);

2400 if (FallThrough2Top >= ExitFreq)

2401 return;

2402 }

2403

2404 BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);

2405 if (ExitIt == LoopChain.end())

2406 return;

2407

2408

2409

2410

2411

2412

2413

2414

2415

2416

2417

2418

2419

2420

2421

2422

2423

2424

2425

2426

2427 if (ViableTopFallthrough) {

2428 assert(std::next(ExitIt) != LoopChain.end() &&

2429 "Exit should not be last BB");

2431 if (ExitingBB->isSuccessor(NextBlockInChain))

2433 return;

2434 }

2435

2437 << " at bottom\n");

2438 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());

2439}

2440

2441

2442

2443

2444

2445

2446

2447

2448

2449

2450

2451

2452

2453

2454void MachineBlockPlacement::rotateLoopWithProfile(

2455 BlockChain &LoopChain, const MachineLoop &L,

2456 const BlockFilterSet &LoopBlockSet) {

2457 auto RotationPos = LoopChain.end();

2459

2460

2462 return;

2463

2465

2466

2467

2470 if (Scale == 0)

2472

2473

2475 };

2476

2477

2478

2479

2481 for (auto *Pred : ChainHeaderBB->predecessors()) {

2482 BlockChain *PredChain = BlockToChain[Pred];

2483 if (!LoopBlockSet.count(Pred) &&

2484 (!PredChain || Pred == *std::prev(PredChain->end()))) {

2485 auto EdgeFreq = MBFI->getBlockFreq(Pred) *

2487 auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);

2488

2489

2491 FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);

2492 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);

2493 }

2494 }

2495

2496

2497

2498

2499

2501 for (auto *BB : LoopChain) {

2503 for (auto *Succ : BB->successors()) {

2504 BlockChain *SuccChain = BlockToChain[Succ];

2505 if (!LoopBlockSet.count(Succ) &&

2506 (!SuccChain || Succ == *SuccChain->begin())) {

2508 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);

2509 }

2510 }

2512 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;

2514 }

2515 }

2516

2517

2518

2519

2520 for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),

2521 EndIter = LoopChain.end();

2522 Iter != EndIter; Iter++, TailIter++) {

2523

2524

2525 if (TailIter == LoopChain.end())

2526 TailIter = LoopChain.begin();

2527

2528 auto TailBB = *TailIter;

2529

2530

2532

2533

2534

2535

2536 if (Iter != LoopChain.begin())

2537 Cost += HeaderFallThroughCost;

2538

2539

2540

2541 for (auto &ExitWithFreq : ExitsWithFreq)

2542 if (TailBB != ExitWithFreq.first)

2543 Cost += ExitWithFreq.second;

2544

2545

2546

2547

2548

2549

2550

2551

2552

2553

2554

2555

2556

2557

2558

2559 if (TailBB->isSuccessor(*Iter)) {

2560 auto TailBBFreq = MBFI->getBlockFreq(TailBB);

2561 if (TailBB->succ_size() == 1)

2563 else if (TailBB->succ_size() == 2) {

2565 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;

2567 ? TailBBFreq * TailToHeadProb.getCompl()

2568 : TailToHeadFreq;

2570 ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);

2571 }

2572 }

2573

2574 LLVM_DEBUG(dbgs() << "The cost of loop rotation by making "

2577

2578 if (Cost < SmallestRotationCost) {

2579 SmallestRotationCost = Cost;

2580 RotationPos = Iter;

2581 }

2582 }

2583

2584 if (RotationPos != LoopChain.end()) {

2586 << " to the top\n");

2587 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());

2588 }

2589}

2590

2591

2592

2593

2594

2596MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {

2597

2598

2599 struct MBBCompare {

2602 return X->getNumber() < Y->getNumber();

2603 }

2604 };

2605 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;

2606

2607

2608

2609

2610

2611

2612

2613

2614

2615

2618 for (auto *LoopPred : L.getHeader()->predecessors())

2619 if (L.contains(LoopPred))

2620 LoopFreq += MBFI->getBlockFreq(LoopPred) *

2622

2624 if (LoopBlockSet.count(LoopBB))

2625 continue;

2626 auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();

2628 continue;

2629 BlockChain *Chain = BlockToChain[LoopBB];

2631 LoopBlockSet.insert(ChainBB);

2632 }

2633 } else

2634 LoopBlockSet.insert(L.block_begin(), L.block_end());

2635

2636

2637

2638

2639 BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());

2640 return Ret;

2641}

2642

2643

2644

2645

2646

2647

2648

2649void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {

2650

2651

2653 buildLoopChains(*InnerLoop);

2654

2656 "BlockWorkList not empty when starting to build loop chains.");

2658 "EHPadWorkList not empty when starting to build loop chains.");

2659 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);

2660

2661

2662

2663

2664 bool RotateLoopWithProfile =

2667

2668

2669

2670

2671

2673

2674

2675

2676

2677

2678

2679

2680 PreferredLoopExit = nullptr;

2682 if (!RotateLoopWithProfile && LoopTop == L.getHeader())

2683 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);

2684

2685 BlockChain &LoopChain = *BlockToChain[LoopTop];

2686

2687

2688

2689

2691 assert(LoopChain.UnscheduledPredecessors == 0 &&

2692 "LoopChain should not have unscheduled predecessors.");

2693 UpdatedPreds.insert(&LoopChain);

2694

2696 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);

2697

2698 buildChain(LoopTop, LoopChain, &LoopBlockSet);

2699

2700 if (RotateLoopWithProfile)

2701 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);

2702 else

2703 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);

2704

2706

2707 bool BadLoop = false;

2708 if (LoopChain.UnscheduledPredecessors) {

2709 BadLoop = true;

2710 dbgs() << "Loop chain contains a block without its preds placed!\n"

2711 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"

2712 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";

2713 }

2716 if (!LoopBlockSet.remove(ChainBB)) {

2717

2718

2719

2720 dbgs() << "Loop chain contains a block not contained by the loop!\n"

2721 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"

2722 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"

2723 << " Bad block: " << getBlockName(ChainBB) << "\n";

2724 }

2725 }

2726

2727 if (!LoopBlockSet.empty()) {

2728 BadLoop = true;

2730 dbgs() << "Loop contains blocks never placed into a chain!\n"

2731 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"

2732 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"

2733 << " Bad block: " << getBlockName(LoopBB) << "\n";

2734 }

2735 assert(!BadLoop && "Detected problems with the placement of this loop.");

2736 });

2737

2738 BlockWorkList.clear();

2739 EHPadWorkList.clear();

2740}

2741

2742void MachineBlockPlacement::buildCFGChains() {

2743

2744

2747 ++FI) {

2749 BlockChain *Chain =

2750 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);

2751

2752

2753 while (true) {

2754 Cond.clear();

2757 break;

2758

2761

2762

2763 assert(NextFI != FE && "Can't fallthrough past the last block.");

2764 LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "

2766 << "\n");

2767 Chain->merge(NextBB, nullptr);

2768#ifndef NDEBUG

2769 BlocksWithUnanalyzableExits.insert(&*BB);

2770#endif

2771 FI = NextFI;

2772 BB = NextBB;

2773 }

2774 }

2775

2776

2777 PreferredLoopExit = nullptr;

2779 buildLoopChains(*L);

2780

2782 "BlockWorkList should be empty before building final chain.");

2784 "EHPadWorkList should be empty before building final chain.");

2785

2788 fillWorkLists(&MBB, UpdatedPreds);

2789

2790 BlockChain &FunctionChain = *BlockToChain[&F->front()];

2791 buildChain(&F->front(), FunctionChain);

2792

2793#ifndef NDEBUG

2795#endif

2797

2798 bool BadFunc = false;

2799 FunctionBlockSetType FunctionBlockSet;

2801 FunctionBlockSet.insert(&MBB);

2802

2804 if (!FunctionBlockSet.erase(ChainBB)) {

2805 BadFunc = true;

2806 dbgs() << "Function chain contains a block not in the function!\n"

2807 << " Bad block: " << getBlockName(ChainBB) << "\n";

2808 }

2809

2810 if (!FunctionBlockSet.empty()) {

2811 BadFunc = true;

2812 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)

2813 dbgs() << "Function contains blocks never placed into a chain!\n"

2814 << " Bad block: " << getBlockName(RemainingBB) << "\n";

2815 }

2816 assert(!BadFunc && "Detected problems with the block placement.");

2817 });

2818

2819

2820

2822 F->getNumBlockIDs());

2823 {

2825 for (auto &MBB : *F) {

2826 if (LastMBB != nullptr)

2827 OriginalLayoutSuccessors[LastMBB->getNumber()] = &MBB;

2828 LastMBB = &MBB;

2829 }

2830 OriginalLayoutSuccessors[F->back().getNumber()] = nullptr;

2831 }

2832

2833

2835 LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");

2837 LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "

2838 : " ... ")

2841 F->splice(InsertPos, ChainBB);

2842 else

2843 ++InsertPos;

2844

2845

2846 if (ChainBB == *FunctionChain.begin())

2847 continue;

2849

2850

2851

2852

2853 Cond.clear();

2855

2856#ifndef NDEBUG

2857 if (!BlocksWithUnanalyzableExits.count(PrevBB)) {

2858

2859

2860

2863 "Unexpected block with un-analyzable fallthrough!");

2864 Cond.clear();

2865 TBB = FBB = nullptr;

2866 }

2867#endif

2868

2869

2870

2871

2872

2873

2874

2875

2876

2877

2878

2879

2880

2881

2882

2883

2884

2885

2886

2887

2888

2889

2892 }

2893 }

2894

2895

2896 Cond.clear();

2901 }

2902

2903 BlockWorkList.clear();

2904 EHPadWorkList.clear();

2905}

2906

2907void MachineBlockPlacement::optimizeBranches() {

2908 BlockChain &FunctionChain = *BlockToChain[&F->front()];

2910

2911

2912

2913

2914

2915

2916

2918 Cond.clear();

2921 continue;

2922 if (TBB || !FBB || Cond.empty())

2923 continue;

2924

2925

2926

2928 continue;

2929

2930

2933 continue;

2935 continue;

2936 LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "

2939 << "\n");

2940 auto Dl = ChainBB->findBranchDebugLoc();

2943 }

2944}

2945

2946void MachineBlockPlacement::alignBlocks() {

2947

2948

2949

2950

2951

2953 if (F->getFunction().hasMinSize() ||

2955 return;

2956 }

2957

2958 BlockChain &FunctionChain = *BlockToChain[&F->front()];

2959

2960 if (FunctionChain.begin() == FunctionChain.end())

2961 return;

2962

2964 BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());

2965 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;

2967 if (ChainBB == *FunctionChain.begin())

2968 continue;

2969

2970

2971

2972

2973

2975 if (!L)

2976 continue;

2977

2979 unsigned MDAlign = 1;

2980 MDNode *LoopID = L->getLoopID();

2981 if (LoopID) {

2983 MDNode *MD = dyn_cast(MDO);

2984 if (MD == nullptr)

2985 continue;

2987 if (S == nullptr)

2988 continue;

2989 if (S->getString() == "llvm.loop.align") {

2991 "per-loop align metadata should have two operands.");

2992 MDAlign =

2993 mdconst::extract(MD->getOperand(1))->getZExtValue();

2994 assert(MDAlign >= 1 && "per-loop align value must be positive.");

2995 }

2996 }

2997 }

2998

2999

3000 const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));

3001 if (LoopAlign == 1)

3002 continue;

3003

3004

3005

3007 if (Freq < WeightedEntryFreq)

3008 continue;

3009

3010

3011

3013 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);

3014 if (Freq < (LoopHeaderFreq * ColdProb))

3015 continue;

3016

3017

3020 continue;

3021

3022

3023

3026

3027 auto DetermineMaxAlignmentPadding = [&]() {

3028

3029 unsigned MaxBytes;

3032 else

3034 ChainBB->setMaxBytesForAlignment(MaxBytes);

3035 };

3036

3037

3038

3039 if (!LayoutPred->isSuccessor(ChainBB)) {

3040 ChainBB->setAlignment(LoopAlign);

3041 DetermineMaxAlignmentPadding();

3042 continue;

3043 }

3044

3045

3046

3047

3048

3051 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;

3052 if (LayoutEdgeFreq <= (Freq * ColdProb)) {

3053 ChainBB->setAlignment(LoopAlign);

3054 DetermineMaxAlignmentPadding();

3055 }

3056 }

3057

3058 const bool HasMaxBytesOverride =

3060

3062

3064 if (HasMaxBytesOverride)

3067 else

3069 }

3071

3072

3073 for (auto MBI = std::next(F->begin()), MBE = F->end(); MBI != MBE; ++MBI) {

3074 auto LayoutPred = std::prev(MBI);

3076 if (HasMaxBytesOverride)

3079 else

3081 }

3082 }

3083 }

3084}

3085

3086

3087

3088

3089

3090

3091

3092

3093

3094

3095

3096

3097

3098

3099

3100

3101bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(

3105 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {

3106 bool Removed, DuplicatedToLPred;

3107 bool DuplicatedToOriginalLPred;

3108 Removed = maybeTailDuplicateBlock(

3109 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,

3110 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);

3111 if (!Removed)

3112 return false;

3113 DuplicatedToOriginalLPred = DuplicatedToLPred;

3114

3115

3116

3117

3118 while (DuplicatedToLPred && Removed) {

3120

3121

3122

3123

3124

3125 BlockChain::iterator ChainEnd = Chain.end();

3126 DupBB = *(--ChainEnd);

3127

3128 if (ChainEnd == Chain.begin())

3129 break;

3130 DupPred = *std::prev(ChainEnd);

3131 Removed = maybeTailDuplicateBlock(

3132 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,

3133 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);

3134 }

3135

3136

3137

3138

3139

3140 LPred = *std::prev(Chain.end());

3141 if (DuplicatedToOriginalLPred)

3142 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);

3143 return true;

3144}

3145

3146

3147

3148

3149

3150

3151

3152

3153

3154

3155

3156

3157

3158

3159bool MachineBlockPlacement::maybeTailDuplicateBlock(

3162 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,

3163 bool &DuplicatedToLPred) {

3164 DuplicatedToLPred = false;

3165 if (!shouldTailDuplicate(BB))

3166 return false;

3167

3169 << "\n");

3170

3171

3172

3173 bool Removed = false;

3175

3176 Removed = true;

3177

3178

3179 bool InWorkList = true;

3180

3181 if (BlockToChain.count(RemBB)) {

3182 BlockChain *Chain = BlockToChain[RemBB];

3183 InWorkList = Chain->UnscheduledPredecessors == 0;

3184 Chain->remove(RemBB);

3185 BlockToChain.erase(RemBB);

3186 }

3187

3188

3189 if (&(*PrevUnplacedBlockIt) == RemBB) {

3190 PrevUnplacedBlockIt++;

3191 }

3192

3193

3194 if (InWorkList) {

3196 if (RemBB->isEHPad())

3197 RemoveList = EHPadWorkList;

3199 }

3200

3201

3202 if (BlockFilter) {

3203 auto It = llvm::find(*BlockFilter, RemBB);

3204

3205

3206 if (It != BlockFilter->end()) {

3207 if (It < PrevUnplacedBlockInFilterIt) {

3209

3210

3211 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;

3212 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance;

3213 assert(*PrevUnplacedBlockInFilterIt == PrevBB);

3214 (void)PrevBB;

3215 } else if (It == PrevUnplacedBlockInFilterIt)

3216

3217

3218 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);

3219 else

3220 BlockFilter->erase(It);

3221 }

3222 }

3223

3224

3225 MLI->removeBlock(RemBB);

3226 if (RemBB == PreferredLoopExit)

3227 PreferredLoopExit = nullptr;

3228

3230 << "\n");

3231 };

3232 auto RemovalCallbackRef =

3234

3236 bool IsSimple = TailDup.isSimpleBB(BB);

3239 if (F->getFunction().hasProfileData()) {

3240

3241 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);

3242 if (CandidatePreds.size() == 0)

3243 return false;

3245 CandidatePtr = &CandidatePreds;

3246 }

3248 &RemovalCallbackRef, CandidatePtr);

3249

3250

3251 DuplicatedToLPred = false;

3253

3254 BlockChain *PredChain = BlockToChain[Pred];

3255 if (Pred == LPred)

3256 DuplicatedToLPred = true;

3257 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||

3258 PredChain == &Chain)

3259 continue;

3261 if (BlockFilter && !BlockFilter->count(NewSucc))

3262 continue;

3263 BlockChain *NewChain = BlockToChain[NewSucc];

3264 if (NewChain != &Chain && NewChain != PredChain)

3265 NewChain->UnscheduledPredecessors++;

3266 }

3267 }

3268 return Removed;

3269}

3270

3271

3275 if (MI.isPHI() && MI.isMetaInstruction())

3277 }

3279}

3280

3281

3282

3283

3286}

3287

3288

3289bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,

3291 BlockFilterSet *BlockFilter) {

3292 if (BB == Pred)

3293 return false;

3294 if (BlockFilter && !BlockFilter->count(Pred))

3295 return false;

3296 BlockChain *PredChain = BlockToChain[Pred];

3297 if (PredChain && (Pred != *std::prev(PredChain->end())))

3298 return false;

3299

3300

3303 if (Succ != BB) {

3304 if (BlockFilter && !BlockFilter->count(Succ))

3305 continue;

3306 BlockChain *SuccChain = BlockToChain[Succ];

3307 if (SuccChain && (Succ != *SuccChain->begin()))

3308 continue;

3310 if (SuccProb > BestProb)

3311 BestProb = SuccProb;

3312 }

3313

3315 if (BBProb <= BestProb)

3316 return false;

3317

3318

3319

3320 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);

3321 BlockFrequency Gain = PredFreq * (BBProb - BestProb);

3322 return Gain > scaleThreshold(BB);

3323}

3324

3325

3326

3327void MachineBlockPlacement::findDuplicateCandidates(

3329 BlockFilterSet *BlockFilter) {

3335

3336

3339 };

3341 return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(B);

3342 };

3345

3346 auto SuccIt = Succs.begin();

3347 if (SuccIt != Succs.end()) {

3349 }

3350

3351

3352

3353

3354

3355

3356

3357

3358

3359

3360

3361

3362

3363

3364

3365

3366

3367

3368

3369

3370

3371

3372

3373

3374

3375

3376

3377

3378

3379

3380

3381

3382

3383

3384

3385

3386

3387

3388

3389

3390

3391

3393 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);

3394

3396

3397

3398 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {

3399 Fallthrough = Pred;

3400 if (SuccIt != Succs.end())

3401 SuccIt++;

3402 }

3403 continue;

3404 }

3405

3406 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;

3408 if (SuccIt == Succs.end()) {

3409

3410 if (Succs.size() > 0)

3411 DupCost += PredFreq;

3412 } else {

3413

3414 DupCost += PredFreq;

3416 }

3417

3418 assert(OrigCost >= DupCost);

3419 OrigCost -= DupCost;

3420 if (OrigCost > BBDupThreshold) {

3422 if (SuccIt != Succs.end())

3423 SuccIt++;

3424 }

3425 }

3426

3427

3428

3429 if (!Fallthrough) {

3430 if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) {

3431 Candidates[0] = Candidates.back();

3433 }

3434 }

3435}

3436

3437void MachineBlockPlacement::initTailDupThreshold() {

3439 if (F->getFunction().hasProfileData()) {

3440

3443 UseProfileCount = true;

3444 DupThreshold =

3446 } else {

3447

3451 if (Freq > MaxFreq)

3452 MaxFreq = Freq;

3453 }

3454

3456 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);

3457 UseProfileCount = false;

3458 }

3459 }

3460

3462

3466

3467

3468

3469 if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) {

3470

3471

3472

3476 }

3477

3478

3479

3481 (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive ||

3483 TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());

3484}

3485

3486bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {

3488 return false;

3489

3490

3491 if (std::next(MF.begin()) == MF.end())

3492 return false;

3493

3494 F = &MF;

3495 MBPI = &getAnalysis().getMBPI();

3496 MBFI = std::make_unique(

3497 getAnalysis().getMBFI());

3498 MLI = &getAnalysis().getLI();

3501 MPDT = nullptr;

3502 PSI = &getAnalysis().getPSI();

3503 PassConfig = &getAnalysis();

3504

3505

3506

3507 PreferredLoopExit = nullptr;

3508

3510 "BlockToChain map should be empty before starting placement.");

3512 "Computed Edge map should be empty before starting placement.");

3513

3514

3515 initTailDupThreshold();

3516

3517 const bool OptForSize =

3519

3520

3521

3522 bool UseExtTspForPerf = false;

3523 bool UseExtTspForSize = false;

3525 UseExtTspForPerf =

3529 }

3530

3531

3532 if (allowTailDupPlacement()) {

3533 MPDT = &getAnalysis().getPostDomTree();

3534 if (OptForSize)

3535 TailDupSize = 1;

3536 const bool PreRegAlloc = false;

3537 TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,

3538 true, TailDupSize);

3539 if (!UseExtTspForSize)

3540 precomputeTriangleChains();

3541 }

3542

3543

3544 if (!UseExtTspForSize)

3545 buildCFGChains();

3546

3547

3548

3549

3553

3554 if (EnableTailMerge) {

3556 BranchFolder BF(true, false,

3558

3560 true)) {

3561

3562 if (MPDT)

3564 if (!UseExtTspForSize) {

3565

3566 BlockToChain.clear();

3567 ComputedEdges.clear();

3569 buildCFGChains();

3570 }

3571 }

3572 }

3573

3574

3575

3576

3577 if (UseExtTspForPerf || UseExtTspForSize) {

3579 !(UseExtTspForPerf && UseExtTspForSize) &&

3580 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");

3581 applyExtTsp(UseExtTspForSize);

3582 createCFGChainExtTsp();

3583 }

3584

3585 optimizeBranches();

3586 alignBlocks();

3587

3588 BlockToChain.clear();

3589 ComputedEdges.clear();

3591

3592

3598 MBFI->view("MBP." + MF.getName(), false);

3599 }

3600

3601

3602

3603 return true;

3604}

3605

3606void MachineBlockPlacement::applyExtTsp(bool OptForSize) {

3607

3609 BlockIndex.reserve(F->size());

3610 std::vector<const MachineBasicBlock *> CurrentBlockOrder;

3611 CurrentBlockOrder.reserve(F->size());

3612 size_t NumBlocks = 0;

3614 BlockIndex[&MBB] = NumBlocks++;

3615 CurrentBlockOrder.push_back(&MBB);

3616 }

3617

3624

3626 BlockCounts[BlockIndex[&MBB]] = OptForSize ? 1 : BlockFreq.getFrequency();

3627

3628

3629

3630

3631

3632

3633 auto NonDbgInsts =

3635 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());

3636 BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts;

3637

3638

3639 if (OptForSize) {

3640 Cond.clear();

3643 continue;

3644

3646

3647

3648

3650 if (TBB && TBB != FTB)

3652 if (FBB && FBB != FTB)

3654 if (FTB)

3656

3657

3658

3659 const uint64_t Freq = Succs.size() == 1 ? 110 : 100;

3661 JumpCounts.push_back({BlockIndex[&MBB], BlockIndex[Succ], Freq});

3662 } else {

3667 {BlockIndex[&MBB], BlockIndex[Succ], JumpFreq.getFrequency()});

3668 }

3669 }

3670 }

3671

3672 LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size()

3673 << " with profile = " << F->getFunction().hasProfileData()

3674 << " (" << F->getName() << ")" << "\n");

3675

3676 const double OrgScore = calcExtTspScore(BlockSizes, JumpCounts);

3678

3679

3680 auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);

3681 std::vector<const MachineBasicBlock *> NewBlockOrder;

3682 NewBlockOrder.reserve(F->size());

3684 NewBlockOrder.push_back(CurrentBlockOrder[Node]);

3685 }

3686 const double OptScore = calcExtTspScore(NewOrder, BlockSizes, JumpCounts);

3688

3689

3690 if (OptForSize && OrgScore > OptScore)

3691 assignBlockOrder(CurrentBlockOrder);

3692 else

3693 assignBlockOrder(NewBlockOrder);

3694}

3695

3696void MachineBlockPlacement::assignBlockOrder(

3697 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {

3698 assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order");

3699 F->RenumberBlocks();

3700

3701

3702

3703

3704 MPDT = nullptr;

3705

3706 bool HasChanges = false;

3707 for (size_t I = 0; I < NewBlockOrder.size(); I++) {

3708 if (NewBlockOrder[I] != F->getBlockNumbered(I)) {

3709 HasChanges = true;

3710 break;

3711 }

3712 }

3713

3714 if (!HasChanges)

3715 return;

3716

3718 for (auto &MBB : *F) {

3720 }

3721

3722

3725 NewIndex[MBB] = NewIndex.size();

3726 }

3728 return NewIndex[&L] < NewIndex[&R];

3729 });

3730

3731

3732

3735 for (auto &MBB : *F) {

3738 auto *FTMBB = PrevFallThroughs[MBB.getNumber()];

3739

3740

3741

3742 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {

3744 }

3745

3746

3747 Cond.clear();

3750 continue;

3752 }

3753}

3754

3755void MachineBlockPlacement::createCFGChainExtTsp() {

3756 BlockToChain.clear();

3757 ComputedEdges.clear();

3759

3761 BlockChain *FunctionChain =

3762 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB);

3763

3765 if (HeadBB == &MBB)

3766 continue;

3767 FunctionChain->merge(&MBB, nullptr);

3768 }

3769}

3770

3771namespace {

3772

3773

3774

3775

3776

3777

3778

3780

3782

3783

3785

3786public:

3787 static char ID;

3788

3791 }

3792

3794

3800 }

3801};

3802

3803}

3804

3805char MachineBlockPlacementStats::ID = 0;

3806

3808

3810 "Basic Block Placement Stats", false, false)

3815

3816bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {

3817

3818 if (std::next(F.begin()) == F.end())

3819 return false;

3820

3822 return false;

3823

3824 MBPI = &getAnalysis().getMBPI();

3825 MBFI = &getAnalysis().getMBFI();

3826

3830 (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;

3832 (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;

3834

3836 continue;

3837

3840 ++NumBranches;

3842 }

3843 }

3844

3845 return false;

3846}

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

This file defines the BumpPtrAllocator interface.

static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Declares methods and data structures for code layout algorithms.

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

static unsigned InstrCount

This file defines the DenseMap class.

DenseMap< Block *, BlockRelaxAux > Blocks

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

const HexagonInstrInfo * TII

static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)

block placement Basic Block Placement Stats

static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)

The helper function returns the branch probability that is adjusted or normalized over the new total ...

static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)

Branch Probability Basic Block Placement

static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)

static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)

static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)

static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)

static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)

static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)

static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)

static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)

static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)

static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)

Compare 2 BlockFrequency's with a small penalty for A.

static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)

static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)

static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)

static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)

static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)

Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)

Helper to print the name of a MBB.

static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))

static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)

Check if BB has exactly the successors in Successors.

static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)

static uint64_t countMBBInstruction(MachineBasicBlock *MBB)

static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)

static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)

static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)

static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB

const SmallVectorImpl< MachineOperand > & Cond

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This file describes how to lower LLVM code to machine code.

Target-Independent Code Generator Pass Configuration Options pass.

unify loop Fixup each natural loop to have a single exit block

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

void setPreservesAll()

Set by analyses that do not transform their input at all.

static BlockFrequency max()

Returns the maximum possible frequency, the saturation value.

uint64_t getFrequency() const

Returns the frequency as a fixpoint number scaled by the entry frequency.

static BranchProbability getOne()

uint32_t getNumerator() const

BranchProbability getCompl() const

static BranchProbability getZero()

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool erase(const KeyT &Val)

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

void reserve(size_type NumEntries)

Grow the densemap so that it can contain at least NumEntries items before resizing again.

bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const

dominates - Returns true iff A dominates B.

void recalculate(ParentType &Func)

recalculate - compute a dominator tree for the given function

bool hasProfileData(bool IncludeSynthetic=false) const

Return true if the function is annotated with profile data.

unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override

Remove the branching code at the end of the specific MBB.

bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override

Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....

bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override

Reverses the branch condition of the specified condition list, returning false on success and true if...

unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override

Insert branch code into the end of the specified MachineBasicBlock.

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

const MDOperand & getOperand(unsigned I) const

ArrayRef< MDOperand > operands() const

unsigned getNumOperands() const

Return number of MDNode operands.

Tracking metadata reference owned by Metadata.

StringRef getString() const

unsigned pred_size() const

bool isEHPad() const

Returns true if the block is a landing pad.

instr_iterator instr_begin()

MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)

Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)

Update the terminator instructions in block to account for changes to block layout which may have bee...

bool canFallThrough()

Return true if the block can implicitly transfer control to the block after it by falling off the end...

succ_iterator succ_begin()

unsigned succ_size() const

void setAlignment(Align A)

Set alignment of the basic block.

bool isEntryBlock() const

Returns true if this is the entry block of the function.

pred_iterator pred_begin()

succ_reverse_iterator succ_rbegin()

bool isLayoutSuccessor(const MachineBasicBlock *MBB) const

Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...

instr_iterator instr_end()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

instr_iterator erase(instr_iterator I)

Remove an instruction from the instruction list and delete it.

DebugLoc findBranchDebugLoc()

Find and return the merged DebugLoc of the branch instructions of the block.

iterator_range< succ_iterator > successors()

bool isSuccessor(const MachineBasicBlock *MBB) const

Return true if the specified MBB is a successor of this block.

iterator_range< pred_iterator > predecessors()

StringRef getName() const

Return the name of the corresponding LLVM basic block, or an empty string.

MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...

BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const

getblockFreq - Return block frequency.

BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

StringRef getName() const

getName - Return the name of the corresponding LLVM function.

Function & getFunction()

Return the LLVM function that this machine code represents.

void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)

RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Representation of each machine instruction.

MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.

Analysis providing profile information.

uint64_t getOrCompHotCountThreshold() const

Returns HotCountThreshold if set.

typename vector_type::const_iterator iterator

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

typename SuperClass::const_iterator const_iterator

typename SuperClass::iterator iterator

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

A BumpPtrAllocator that allows only elements of a specific type to be allocated.

T * Allocate(size_t num=1)

Allocate space for an array of objects without constructing them.

void DestroyAll()

Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...

Utility class to perform tail duplication.

void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)

Prepare to run on a specific machine function.

bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)

Tail duplicate a single basic block into its predecessors, and then clean up.

static bool isSimpleBB(MachineBasicBlock *TailBB)

True if this BB has only one unconditional jump.

bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)

Returns true if TailBB can successfully be duplicated into PredBB.

bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)

Determine if it is profitable to duplicate this block.

TargetInstrInfo - Interface to description of machine instruction set.

This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...

virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const

Return the maximum amount of bytes allowed to be emitted when padding for alignment.

virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const

Return the preferred loop alignment.

virtual bool alignLoopsWithOptSize() const

Should loops be aligned even when the function is marked OptSize (but not MinSize).

bool requiresStructuredCFG() const

Target-Independent Code Generator Pass Configuration Options.

bool getEnableTailMerge() const

CodeGenOptLevel getOptLevel() const

virtual const TargetRegisterInfo * getRegisterInfo() const

getRegisterInfo - If register information is available, return it.

virtual const TargetInstrInfo * getInstrInfo() const

virtual const TargetLowering * getTargetLowering() const

An efficient, type-erasing, non-owning reference to a callable.

self_iterator getIterator()

A raw_ostream that writes to an std::string.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)

Estimate the "quality" of a given node order in CFG.

std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)

Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...

void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")

Append to path.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

void stable_sort(R &&Range)

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

cl::opt< bool > ApplyExtTspWithoutProfile

bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

void initializeMachineBlockPlacementStatsPass(PassRegistry &)

cl::opt< unsigned > ProfileLikelyProb

void erase(Container &C, ValueType V)

Wrapper function to remove a value from a container:

cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))

auto reverse(ContainerTy &&C)

cl::opt< unsigned > StaticLikelyProb

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))

bool isFunctionInPrintList(StringRef FunctionName)

auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)

Construct a range iterator which begins at It and moves forwards until End is reached,...

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)

Print the block frequency Freq relative to the current functions entry frequency.

char & MachineBlockPlacementID

MachineBlockPlacement - This pass places basic blocks based on branch probabilities.

cl::opt< bool > EnableExtTspBlockPlacement

char & MachineBlockPlacementStatsID

MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...

void initializeMachineBlockPlacementPass(PassRegistry &)

Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.