LLVM: lib/CodeGen/MachineBlockPlacement.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
66#include
67#include
68#include
69#include
70#include
71#include
72#include
73#include
74#include
75
76using namespace llvm;
77
78#define DEBUG_TYPE "block-placement"
79
80STATISTIC(NumCondBranches, "Number of conditional branches");
81STATISTIC(NumUncondBranches, "Number of unconditional branches");
83 "Potential frequency of taking conditional branches");
85 "Potential frequency of taking unconditional branches");
86
88 "align-all-blocks",
89 cl::desc("Force the alignment of all blocks in the function in log2 format "
90 "(e.g 4 means align on 16B boundaries)."),
92
94 "align-all-nofallthru-blocks",
95 cl::desc("Force the alignment of all blocks that have no fall-through "
96 "predecessors (i.e. don't add nops that are executed). In log2 "
97 "format (e.g 4 means align on 16B boundaries)."),
99
101 "max-bytes-for-alignment",
102 cl::desc("Forces the maximum bytes allowed to be emitted when padding for "
103 "alignment"),
105
106
108 "block-placement-exit-block-bias",
109 cl::desc("Block frequency percentage a loop exit block needs "
110 "over the original exit to be considered the new exit."),
112
113
114
115
117 "loop-to-cold-block-ratio",
118 cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
119 "(frequency of block) is greater than this ratio"),
121
124 cl::desc("Force outlining cold blocks from loops."),
126
129 cl::desc("Model the cost of loop rotation more "
130 "precisely by using profile data."),
132
135 cl::desc("Force the use of precise cost "
136 "loop rotation strategy."),
138
140 "misfetch-cost",
141 cl::desc("Cost that models the probabilistic risk of an instruction "
142 "misfetch due to a jump comparing to falling through, whose cost "
143 "is zero."),
145
147 cl::desc("Cost of jump instructions."),
151 cl::desc("Perform tail duplication during placement. "
152 "Creates more fallthrough opportunities in "
153 "outline branches."),
155
158 cl::desc("Perform branch folding during placement. "
159 "Reduces code size."),
161
162
164 "tail-dup-placement-threshold",
165 cl::desc("Instruction cutoff for tail duplication during layout. "
166 "Tail merging during layout is forced to have a threshold "
167 "that won't conflict."),
169
170
172 "tail-dup-placement-aggressive-threshold",
173 cl::desc("Instruction cutoff for aggressive tail duplication during "
174 "layout. Used at -O3. Tail merging during layout is forced to "
175 "have a threshold that won't conflict."),
177
178
180 "tail-dup-placement-penalty",
182 "Cost penalty for blocks that can avoid breaking CFG by copying. "
183 "Copying can increase fallthrough, but it also increases icache "
184 "pressure. This parameter controls the penalty to account for that. "
185 "Percent as integer."),
187
188
190 "tail-dup-profile-percent-threshold",
191 cl::desc("If profile count information is used in tail duplication cost "
192 "model, the gained fall through number from tail duplication "
193 "should be at least this percent of hot count."),
195
196
198 "triangle-chain-count",
199 cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
200 "triangle tail duplication heuristic to kick in. 0 to disable."),
202
203
204
205
206
207
209 "renumber-blocks-before-view",
211 "If true, basic blocks are re-numbered before MBP layout is printed "
212 "into a dot graph. Only used when a function is being printed."),
214
216 "ext-tsp-block-placement-max-blocks",
217 cl::desc("Maximum number of basic blocks in a function to run ext-TSP "
218 "block placement."),
220
221
224 cl::desc("Use ext-tsp for size-aware block placement."));
225
226namespace llvm {
231
232
233
234
236
237
238
240}
241
242namespace {
243
244class BlockChain;
245
246
248
249
250
251
252
253
254
255
256
257
258
259
260class BlockChain {
261
262
263
264
266
267
268
269
270
271
272
273 BlockToChainMapType &BlockToChain;
274
275public:
276
277
278
279
280
281 BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
282 : Blocks(1, BB), BlockToChain(BlockToChain) {
283 assert(BB && "Cannot create a chain with a null basic block");
284 BlockToChain[BB] = this;
285 }
286
287
290
291
292 iterator begin() { return Blocks.begin(); }
294
295
296 iterator end() { return Blocks.end(); }
298
300 for (iterator i = begin(); i != end(); ++i) {
301 if (*i == BB) {
303 return true;
304 }
305 }
306 return false;
307 }
308
309
310
311
312
313
314
316 assert(BB && "Can't merge a null block.");
317 assert(.empty() && "Can't merge into an empty chain.");
318
319
320 if (!Chain) {
321 assert(!BlockToChain[BB] &&
322 "Passed chain is null, but BB has entry in BlockToChain.");
323 Blocks.push_back(BB);
324 BlockToChain[BB] = this;
325 return;
326 }
327
328 assert(BB == *Chain->begin() && "Passed BB is not head of Chain.");
329 assert(Chain->begin() != Chain->end());
330
331
332
334 Blocks.push_back(ChainBB);
335 assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain.");
336 BlockToChain[ChainBB] = this;
337 }
338 }
339
340#ifndef NDEBUG
341
345 }
346#endif
347
348
349
350
351
352
353
354
355
356
357 unsigned UnscheduledPredecessors = 0;
358};
359
361
363
364
365 struct BlockAndTailDupResult {
367 bool ShouldTailDup;
368 };
369
370
371 struct WeightedEdge {
375 };
376
377
380
381
383
384
386
387
389
390
391 std::unique_ptr MBFI;
392
393
395
396
397
398
400
401
403
404
406
407
409
411
413
414
415
416
417
418
420
421
423
424 unsigned TailDupSize;
425
426
427
428 bool UseProfileCount = false;
429
430
431
432
433
434
435
436
438
439
440
441
442
443
444
446
447#ifndef NDEBUG
448
449
450
451
453#endif
454
455
456
458 if (UseProfileCount) {
459 auto Count = MBFI->getBlockProfileCount(BB);
460 if (Count)
462 else
464 } else
465 return MBFI->getBlockFreq(BB);
466 }
467
468
470 void initTailDupThreshold();
471
472
473
474 void markChainSuccessors(const BlockChain &Chain,
476 const BlockFilterSet *BlockFilter = nullptr);
477
478
479
480 void markBlockSuccessors(const BlockChain &Chain, const MachineBasicBlock *BB,
482 const BlockFilterSet *BlockFilter = nullptr);
483
485 collectViableSuccessors(const MachineBasicBlock *BB, const BlockChain &Chain,
486 const BlockFilterSet *BlockFilter,
489 BlockFilterSet *BlockFilter);
492 BlockFilterSet *BlockFilter);
493 bool repeatedlyTailDuplicateBlock(
496 BlockFilterSet *BlockFilter,
499 bool
501 BlockChain &Chain, BlockFilterSet *BlockFilter,
504 bool &DuplicatedToLPred);
507 const BlockChain &SuccChain,
510 const BlockChain &Chain,
511 const BlockFilterSet *BlockFilter);
512 BlockAndTailDupResult selectBestSuccessor(const MachineBasicBlock *BB,
513 const BlockChain &Chain,
514 const BlockFilterSet *BlockFilter);
516 selectBestCandidateBlock(const BlockChain &Chain,
519 getFirstUnplacedBlock(const BlockChain &PlacedChain,
522 getFirstUnplacedBlock(const BlockChain &PlacedChain,
524 const BlockFilterSet *BlockFilter);
525
526
527
528
529
530
533 const BlockFilterSet *BlockFilter);
534
536 BlockFilterSet *BlockFilter = nullptr);
537 bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
540 const BlockFilterSet &LoopBlockSet);
542 const BlockFilterSet &LoopBlockSet);
546 const BlockFilterSet &LoopBlockSet);
549 const BlockFilterSet &LoopBlockSet);
551 const BlockFilterSet &LoopBlockSet);
553 const BlockFilterSet &LoopBlockSet,
555 BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
556 void buildLoopChains(const MachineLoop &L);
557 void rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
558 BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
559 void rotateLoopWithProfile(BlockChain &LoopChain, const MachineLoop &L,
560 const BlockFilterSet &LoopBlockSet);
561 void buildCFGChains();
562 void optimizeBranches();
563 void alignBlocks();
564
565
567
568
572 const BlockFilterSet *BlockFilter);
573
574
577 const BlockChain &Chain, const BlockFilterSet *BlockFilter);
578
579
580 BlockAndTailDupResult getBestTrellisSuccessor(
584 const BlockFilterSet *BlockFilter);
585
586
587 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
590
591
592
595 const BlockChain &Chain,
596 const BlockFilterSet *BlockFilter);
597
598
599
600 void precomputeTriangleChains();
601
602
603 void applyExtTsp(bool OptForSize);
604
605
606 void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder);
607
608
609 void createCFGChainExtTsp();
610
611public:
612 static char ID;
613
616 }
617
619
620 bool allowTailDupPlacement() const {
622 return TailDupPlacement && ->getTarget().requiresStructuredCFG();
623 }
624
634 }
635};
636
637}
638
639char MachineBlockPlacement::ID = 0;
640
642
644 "Branch Probability Basic Block Placement", false, false)
652
653#ifndef NDEBUG
654
655
656
658 std::string Result;
661 OS << " ('" << BB->getName() << "')";
663 return Result;
664}
665#endif
666
667
668
669
670
671
672
673void MachineBlockPlacement::markChainSuccessors(
675 const BlockFilterSet *BlockFilter) {
676
677
679 markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
680 }
681}
682
683
684
685
686
687
688
689void MachineBlockPlacement::markBlockSuccessors(
691 const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {
692
693
694
695
697 if (BlockFilter && !BlockFilter->count(Succ))
698 continue;
699 BlockChain &SuccChain = *BlockToChain[Succ];
700
701 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
702 continue;
703
704
705
706 if (SuccChain.UnscheduledPredecessors == 0 ||
707 --SuccChain.UnscheduledPredecessors > 0)
708 continue;
709
710 auto *NewBB = *SuccChain.begin();
711 if (NewBB->isEHPad())
713 else
715 }
716}
717
718
719
720
721
724 const BlockFilterSet *BlockFilter,
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
744 bool SkipSucc = false;
745 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
746 SkipSucc = true;
747 } else {
748 BlockChain *SuccChain = BlockToChain[Succ];
749 if (SuccChain == &Chain) {
750 SkipSucc = true;
751 } else if (Succ != *SuccChain->begin()) {
753 << " -> Mid chain!\n");
754 continue;
755 }
756 }
757 if (SkipSucc)
759 else
761 }
762
763 return AdjustedSumProb;
764}
765
766
767
774 if (SuccProbN >= SuccProbD)
776 else
778
779 return SuccProb;
780}
781
782
783static bool
787 return false;
788
789 if (Successors.count(&BB))
790 return false;
792 if (!Successors.count(Succ))
793 return false;
794 return true;
795}
796
797
798
799
800bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
801
802
803
804 bool IsSimple = TailDup.isSimpleBB(BB);
805
807 return false;
809}
810
811
812
813
814
815
816
821 return (Gain / ThresholdProb) >= EntryFreq;
822}
823
824
825
826
827
828
829bool MachineBlockPlacement::isProfitableToTailDup(
832 const BlockFilterSet *BlockFilter) {
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
858
859 auto AdjustedSuccSumProb =
860 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
862 auto BBFreq = MBFI->getBlockFreq(BB);
863 auto SuccFreq = MBFI->getBlockFreq(Succ);
867
868
869 if (SuccSuccs.size() == 0)
871
873
876 if (Prob > BestSuccSucc)
877 BestSuccSucc = Prob;
878 if (PDom == nullptr)
879 if (MPDT->dominates(SuccSucc, Succ)) {
880 PDom = SuccSucc;
881 break;
882 }
883 }
884
885
888 if (SuccPred == Succ || SuccPred == BB ||
889 BlockToChain[SuccPred] == &Chain ||
890 (BlockFilter && !BlockFilter->count(SuccPred)))
891 continue;
892 auto Freq =
893 MBFI->getBlockFreq(SuccPred) * MBPI->getEdgeProbability(SuccPred, Succ);
894 if (Freq > SuccBestPred)
895 SuccBestPred = Freq;
896 }
897
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918 if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
925 BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;
927 }
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962 if (UProb > AdjustedSuccSumProb / 2 &&
963 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
964 Chain, BlockFilter))
965
967 (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
968 EntryFreq);
969
971 (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
972 std::max(Qin, F) * UProb),
973 EntryFreq);
974}
975
976
977
978
979
980
981
982
983bool MachineBlockPlacement::isTrellis(
986 const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
987
988
989 if (BB->succ_size() != 2 || ViableSuccs.size() != 2)
990 return false;
991
994
996
998 int PredCount = 0;
999 for (auto *SuccPred : Succ->predecessors()) {
1000
1001 if (Successors.count(SuccPred)) {
1002
1004 if (!Successors.count(CheckSucc))
1005 return false;
1006 continue;
1007 }
1008 const BlockChain *PredChain = BlockToChain[SuccPred];
1009 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1010 PredChain == &Chain || PredChain == BlockToChain[Succ])
1011 continue;
1012 ++PredCount;
1013
1014 if (!SeenPreds.insert(SuccPred).second)
1015 continue;
1017 return false;
1018 }
1019
1020
1021 if (PredCount < 1)
1022 return false;
1023 }
1024 return true;
1025}
1026
1027
1028
1029
1030
1031
1032std::pair<MachineBlockPlacement::WeightedEdge,
1033 MachineBlockPlacement::WeightedEdge>
1034MachineBlockPlacement::getBestNonConflictingEdges(
1037 Edges) {
1038
1039
1040
1041
1042
1043
1044
1045 auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
1046
1049 auto BestA = Edges[0].begin();
1050 auto BestB = Edges[1].begin();
1051
1052
1053 if (BestA->Src == BestB->Src) {
1054
1055 auto SecondBestA = std::next(BestA);
1056 auto SecondBestB = std::next(BestB);
1057 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
1058 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
1059 if (BestAScore < BestBScore)
1060 BestA = SecondBestA;
1061 else
1062 BestB = SecondBestB;
1063 }
1064
1065 if (BestB->Src == BB)
1067 return std::make_pair(*BestA, *BestB);
1068}
1069
1070
1071
1072
1073
1074
1075
1076
1077MachineBlockPlacement::BlockAndTailDupResult
1078MachineBlockPlacement::getBestTrellisSuccessor(
1082 const BlockFilterSet *BlockFilter) {
1083
1084 BlockAndTailDupResult Result = {nullptr, false};
1087
1088
1089
1090
1091 if (Successors.size() != 2 || ViableSuccs.size() != 2)
1093
1094
1096 int SuccIndex = 0;
1097 for (auto *Succ : ViableSuccs) {
1099
1100 if (SuccPred != BB)
1101 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
1102 BlockToChain[SuccPred] == &Chain ||
1103 BlockToChain[SuccPred] == BlockToChain[Succ])
1104 continue;
1105 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
1107 Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
1108 }
1109 ++SuccIndex;
1110 }
1111
1112
1113 WeightedEdge BestA, BestB;
1114 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1115
1116 if (BestA.Src != BB) {
1117
1118
1119
1120 LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
1122 }
1123
1124
1125
1126
1127 if (BestA.Dest == BestB.Src) {
1128
1129
1132
1133 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1134 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1135 isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
1136 Chain, BlockFilter)) {
1140 << ", probability: " << Succ2Prob
1141 << " (Tail Duplicate)\n");
1143 Result.ShouldTailDup = true;
1145 }
1146 }
1147
1148
1149 ComputedEdges[BestB.Src] = {BestB.Dest, false};
1150
1151 auto TrellisSucc = BestA.Dest;
1155 << ", probability: " << SuccProb << " (Trellis)\n");
1156 Result.BB = TrellisSucc;
1158}
1159
1160
1161
1162
1163bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1165 const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
1166 if (!shouldTailDuplicate(Succ))
1167 return false;
1168
1169
1170 bool Duplicate = true;
1171
1172 unsigned int NumDup = 0;
1173
1174
1178
1179
1180
1181 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1182 (BlockToChain[Pred] == &Chain && !Succ->succ_empty()))
1183 continue;
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219 continue;
1220 Duplicate = false;
1221 continue;
1222 }
1223 NumDup++;
1224 }
1225
1226
1227 if (NumDup == 0)
1228 return false;
1229
1230
1231
1232 if (F->getFunction().hasProfileData())
1233 return true;
1234
1235
1236
1237
1238
1240 return true;
1241
1242
1243 NumDup++;
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263 if ((NumDup > Succ->succ_size()) || !Duplicate)
1264 return false;
1265
1266 return true;
1267}
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286void MachineBlockPlacement::precomputeTriangleChains() {
1287 struct TriangleChain {
1288 std::vector<MachineBasicBlock *> Edges;
1289
1291 : Edges({src, dst}) {}
1292
1294 assert(getKey()->isSuccessor(dst) &&
1295 "Attempting to append a block that is not a successor.");
1296 Edges.push_back(dst);
1297 }
1298
1299 unsigned count() const { return Edges.size() - 1; }
1300
1302 };
1303
1305 return;
1306
1307 LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n");
1308
1309
1312
1314 continue;
1317 if (!MPDT->dominates(Succ, &BB))
1318 continue;
1319 PDom = Succ;
1320 break;
1321 }
1322
1323
1324 if (PDom == nullptr)
1325 continue;
1326
1328 continue;
1329
1330
1331 if (!shouldTailDuplicate(PDom))
1332 continue;
1333 bool CanTailDuplicate = true;
1334
1335
1337 if (Pred == &BB)
1338 continue;
1340 CanTailDuplicate = false;
1341 break;
1342 }
1343 }
1344
1345
1346 if (!CanTailDuplicate)
1347 continue;
1348
1349
1350
1351
1352
1353 auto Found = TriangleChainMap.find(&BB);
1354
1355
1356 if (Found != TriangleChainMap.end()) {
1357 TriangleChain Chain = std::move(Found->second);
1358 TriangleChainMap.erase(Found);
1359 Chain.append(PDom);
1360 TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1361 } else {
1362 auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);
1363 assert(InsertResult.second && "Block seen twice.");
1364 (void)InsertResult;
1365 }
1366 }
1367
1368
1369
1370
1371 for (auto &ChainPair : TriangleChainMap) {
1372 TriangleChain &Chain = ChainPair.second;
1373
1374
1375
1377 continue;
1379 Chain.Edges.pop_back();
1383 << " as pre-computed based on triangles.\n");
1384
1385 auto InsertResult = ComputedEdges.insert({src, {dst, true}});
1386 assert(InsertResult.second && "Block seen twice.");
1387 (void)InsertResult;
1388
1389 dst = src;
1390 }
1391 }
1392}
1393
1394
1395
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1415 }
1416 }
1418}
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1432 const BlockFilterSet *BlockFilter) {
1433
1434
1435 if (SuccChain.UnscheduledPredecessors == 0)
1436 return false;
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1552
1553
1554
1555 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1556 bool BadCFGConflict = false;
1557
1559 BlockChain *PredChain = BlockToChain[Pred];
1560 if (Pred == Succ || PredChain == &SuccChain ||
1561 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1562 Pred != *std::prev(PredChain->end()) ||
1563
1564
1565
1566 (Pred == BB))
1567 continue;
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1583 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
1584 BadCFGConflict = true;
1585 break;
1586 }
1587 }
1588
1589 if (BadCFGConflict) {
1591 << SuccProb << " (prob) (non-cold CFG conflict)\n");
1592 return true;
1593 }
1594
1595 return false;
1596}
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608MachineBlockPlacement::BlockAndTailDupResult
1609MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,
1610 const BlockChain &Chain,
1611 const BlockFilterSet *BlockFilter) {
1613
1614 BlockAndTailDupResult BestSucc = {nullptr, false};
1616
1618 auto AdjustedSumProb =
1619 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1620
1622 << "\n");
1623
1624
1625
1626 auto FoundEdge = ComputedEdges.find(BB);
1627 if (FoundEdge != ComputedEdges.end()) {
1629 ComputedEdges.erase(FoundEdge);
1630 BlockChain *SuccChain = BlockToChain[Succ];
1631 if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1632 SuccChain != &Chain && Succ == *SuccChain->begin())
1633 return FoundEdge->second;
1634 }
1635
1636
1637
1638 if (isTrellis(BB, Successors, Chain, BlockFilter))
1639 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1640 BlockFilter);
1641
1642
1643
1644
1646 DupCandidates;
1651
1652 BlockChain &SuccChain = *BlockToChain[Succ];
1653
1654
1655 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1656 Chain, BlockFilter)) {
1657
1658 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1660 continue;
1661 }
1662
1665 << ", probability: " << SuccProb
1666 << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
1667 << "\n");
1668
1669 if (BestSucc.BB && BestProb >= SuccProb) {
1670 LLVM_DEBUG(dbgs() << " Not the best candidate, continuing\n");
1671 continue;
1672 }
1673
1674 LLVM_DEBUG(dbgs() << " Setting it as best candidate\n");
1675 BestSucc.BB = Succ;
1676 BestProb = SuccProb;
1677 }
1678
1679
1680
1681
1682
1684 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1685 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1686 return std::get<0>(L) > std::get<0>(R);
1687 });
1688 for (auto &Tup : DupCandidates) {
1691 std::tie(DupProb, Succ) = Tup;
1692 if (DupProb < BestProb)
1693 break;
1694 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1695 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1697 << ", probability: " << DupProb
1698 << " (Tail Duplicate)\n");
1699 BestSucc.BB = Succ;
1700 BestSucc.ShouldTailDup = true;
1701 break;
1702 }
1703 }
1704
1705 if (BestSucc.BB)
1707
1708 return BestSucc;
1709}
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
1723
1724
1725
1726
1728 return BlockToChain.lookup(BB) == &Chain;
1729 });
1730
1731 if (WorkList.empty())
1732 return nullptr;
1733
1734 bool IsEHPad = WorkList[0]->isEHPad();
1735
1740 "EHPad mismatch between block and work list.");
1741
1742 BlockChain &SuccChain = *BlockToChain[MBB];
1743 if (&SuccChain == &Chain)
1744 continue;
1745
1746 assert(SuccChain.UnscheduledPredecessors == 0 &&
1747 "Found CFG-violating block");
1748
1752 << " (freq)\n");
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1773 continue;
1774
1775 BestBlock = MBB;
1776 BestFreq = CandidateFreq;
1777 }
1778
1779 return BestBlock;
1780}
1781
1782
1783
1784
1785
1786
1787
1788
1790 const BlockChain &PlacedChain,
1792
1794 ++I) {
1795 if (BlockToChain[&*I] != &PlacedChain) {
1796 PrevUnplacedBlockIt = I;
1797
1798
1799
1800 return *BlockToChain[&*I]->begin();
1801 }
1802 }
1803 return nullptr;
1804}
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1817 const BlockChain &PlacedChain,
1818 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1819 const BlockFilterSet *BlockFilter) {
1820 assert(BlockFilter);
1821 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1822 ++PrevUnplacedBlockInFilterIt) {
1823 BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1824 if (C != &PlacedChain) {
1825 return *C->begin();
1826 }
1827 }
1828 return nullptr;
1829}
1830
1831void MachineBlockPlacement::fillWorkLists(
1833 const BlockFilterSet *BlockFilter = nullptr) {
1834 BlockChain &Chain = *BlockToChain[MBB];
1835 if (!UpdatedPreds.insert(&Chain).second)
1836 return;
1837
1839 Chain.UnscheduledPredecessors == 0 &&
1840 "Attempting to place block with unscheduled predecessors in worklist.");
1842 assert(BlockToChain[ChainBB] == &Chain &&
1843 "Block in chain doesn't match BlockToChain map.");
1845 if (BlockFilter && !BlockFilter->count(Pred))
1846 continue;
1847 if (BlockToChain[Pred] == &Chain)
1848 continue;
1849 ++Chain.UnscheduledPredecessors;
1850 }
1851 }
1852
1853 if (Chain.UnscheduledPredecessors != 0)
1854 return;
1855
1859 else
1861}
1862
1863void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
1864 BlockChain &Chain,
1865 BlockFilterSet *BlockFilter) {
1866 assert(HeadBB && "BB must not be null.\n");
1867 assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
1869 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1870 if (BlockFilter)
1871 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1872
1874 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1876 while (true) {
1877 assert(BB && "null block found at end of chain in loop.");
1878 assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
1879 assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
1880
1881
1882
1883 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1885 bool ShouldTailDup = Result.ShouldTailDup;
1886 if (allowTailDupPlacement())
1887 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1888 BB, BestSucc, Chain, BlockFilter));
1889
1890
1891
1892
1893 if (!BestSucc)
1894 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1895 if (!BestSucc)
1896 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1897
1898 if (!BestSucc) {
1899 if (BlockFilter)
1900 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1901 BlockFilter);
1902 else
1903 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1904 if (!BestSucc)
1905 break;
1906
1907 LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
1908 "layout successor until the CFG reduces\n");
1909 }
1910
1911
1912
1913 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1914 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1915 BlockFilter, PrevUnplacedBlockIt,
1916 PrevUnplacedBlockInFilterIt);
1917
1918
1920 continue;
1921 }
1922
1923
1924 BlockChain &SuccChain = *BlockToChain[BestSucc];
1925
1926
1927 SuccChain.UnscheduledPredecessors = 0;
1930 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1931 Chain.merge(BestSucc, &SuccChain);
1932 BB = *std::prev(Chain.end());
1933 }
1934
1935 LLVM_DEBUG(dbgs() << "Finished forming chain for header block "
1937}
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953bool MachineBlockPlacement::canMoveBottomBlockToTop(
1955 if (BottomBlock->pred_size() != 1)
1956 return true;
1959 return true;
1960
1962 if (OtherBB == BottomBlock)
1964 if (OtherBB == OldTop)
1965 return false;
1966
1967 return true;
1968}
1969
1970
1972MachineBlockPlacement::TopFallThroughFreq(const MachineBasicBlock *Top,
1973 const BlockFilterSet &LoopBlockSet) {
1976 BlockChain *PredChain = BlockToChain[Pred];
1977 if (!LoopBlockSet.count(Pred) &&
1978 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1979
1980
1982 bool TopOK = true;
1985 BlockChain *SuccChain = BlockToChain[Succ];
1986
1987
1988 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
1989 (!SuccChain || Succ == *SuccChain->begin())) {
1990 TopOK = false;
1991 break;
1992 }
1993 }
1994 if (TopOK) {
1997 if (EdgeFreq > MaxFreq)
1998 MaxFreq = EdgeFreq;
1999 }
2000 }
2001 }
2002 return MaxFreq;
2003}
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026BlockFrequency MachineBlockPlacement::FallThroughGains(
2028 const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet) {
2029 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2031 if (ExitBB)
2032 FallThrough2Exit =
2033 MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, ExitBB);
2035 MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, OldTop);
2036
2037
2041 if (!LoopBlockSet.count(Pred))
2042 continue;
2043 BlockChain *PredChain = BlockToChain[Pred];
2044 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2047 if (EdgeFreq > FallThroughFromPred) {
2048 FallThroughFromPred = EdgeFreq;
2049 BestPred = Pred;
2050 }
2051 }
2052 }
2053
2054
2055
2057 if (BestPred) {
2059 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2060 continue;
2061 if (ComputedEdges.contains(Succ))
2062 continue;
2063 BlockChain *SuccChain = BlockToChain[Succ];
2064 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2065 (SuccChain == BlockToChain[BestPred]))
2066 continue;
2067 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
2069 if (EdgeFreq > NewFreq)
2070 NewFreq = EdgeFreq;
2071 }
2072 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
2074 if (NewFreq > OrigEdgeFreq) {
2075
2076
2077
2080 }
2081 }
2082
2086 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2087 if (Gains > Lost)
2088 Result = Gains - Lost;
2090}
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2116 const BlockFilterSet &LoopBlockSet) {
2117
2118
2119
2120 BlockChain &HeaderChain = *BlockToChain[OldTop];
2121 if (!LoopBlockSet.count(*HeaderChain.begin()))
2122 return OldTop;
2123 if (OldTop != *HeaderChain.begin())
2124 return OldTop;
2125
2127 << "\n");
2128
2132 if (!LoopBlockSet.count(Pred))
2133 continue;
2134 if (Pred == L.getHeader())
2135 continue;
2137 << Pred->succ_size() << " successors, "
2138 << printBlockFreq(MBFI->getMBFI(), *Pred) << " freq\n");
2140 continue;
2141
2145 if (OtherBB == OldTop)
2147 }
2148
2149 if (!canMoveBottomBlockToTop(Pred, OldTop))
2150 continue;
2151
2153 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2155 (Gains > BestGains ||
2157 BestPred = Pred;
2158 BestGains = Gains;
2159 }
2160 }
2161
2162
2163 if (!BestPred) {
2165 return OldTop;
2166 }
2167
2168
2169 while (BestPred->pred_size() == 1 &&
2170 (*BestPred->pred_begin())->succ_size() == 1 &&
2171 *BestPred->pred_begin() != L.getHeader())
2173
2175 return BestPred;
2176}
2177
2178
2179
2180
2181
2183MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
2184 const BlockFilterSet &LoopBlockSet) {
2185
2186
2187
2188
2189
2190
2191
2193 return L.getHeader();
2194
2197 while (NewTop != OldTop) {
2198 OldTop = NewTop;
2199 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2200 if (NewTop != OldTop)
2201 ComputedEdges[NewTop] = {OldTop, false};
2202 }
2203 return NewTop;
2204}
2205
2206
2207
2208
2209
2210
2212MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
2213 const BlockFilterSet &LoopBlockSet,
2215
2216
2217
2218
2219
2220
2221
2222
2223 BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
2224 if (!LoopBlockSet.count(*HeaderChain.begin()))
2225 return nullptr;
2226
2228 unsigned BestExitLoopDepth = 0;
2230
2231
2232
2234
2238 BlockChain &Chain = *BlockToChain[MBB];
2239
2240
2241 if (MBB != *std::prev(Chain.end()))
2242 continue;
2243
2244
2245
2246
2247
2249 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
2250 bool HasLoopingSucc = false;
2253 continue;
2254 if (Succ == MBB)
2255 continue;
2256 BlockChain &SuccChain = *BlockToChain[Succ];
2257
2258 if (&Chain == &SuccChain) {
2260 << getBlockName(Succ) << " (chain conflict)\n");
2261 continue;
2262 }
2263
2265 if (LoopBlockSet.count(Succ)) {
2267 << getBlockName(Succ) << " (" << SuccProb << ")\n");
2268 HasLoopingSucc = true;
2269 continue;
2270 }
2271
2272 unsigned SuccLoopDepth = 0;
2274 SuccLoopDepth = ExitLoop->getLoopDepth();
2275 if (ExitLoop->contains(&L))
2276 BlocksExitingToOuterLoop.insert(MBB);
2277 }
2278
2279 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
2282 << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("
2283 << printBlockFreq(MBFI->getMBFI(), ExitEdgeFreq) << ")\n");
2284
2285
2286
2287
2289 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2290 ExitEdgeFreq > BestExitEdgeFreq ||
2292 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2293 BestExitEdgeFreq = ExitEdgeFreq;
2294 ExitingBB = MBB;
2295 }
2296 }
2297
2298 if (!HasLoopingSucc) {
2299
2300 ExitingBB = OldExitingBB;
2301 BestExitEdgeFreq = OldBestExitEdgeFreq;
2302 }
2303 }
2304
2305
2306 if (!ExitingBB) {
2308 dbgs() << " No other candidate exit blocks, using loop header\n");
2309 return nullptr;
2310 }
2311 if (L.getNumBlocks() == 1) {
2312 LLVM_DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");
2313 return nullptr;
2314 }
2315
2316
2317
2318
2319 if (!BlocksExitingToOuterLoop.empty() &&
2320 !BlocksExitingToOuterLoop.count(ExitingBB))
2321 return nullptr;
2322
2324 << "\n");
2325 ExitFreq = BestExitEdgeFreq;
2326 return ExitingBB;
2327}
2328
2329
2330
2331
2332
2333bool MachineBlockPlacement::hasViableTopFallthrough(
2334 const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet) {
2336 BlockChain *PredChain = BlockToChain[Pred];
2337 if (!LoopBlockSet.count(Pred) &&
2338 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2339
2340
2342 bool TopOK = true;
2345 BlockChain *SuccChain = BlockToChain[Succ];
2346
2347
2348 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2349 TopOK = false;
2350 break;
2351 }
2352 }
2353 if (TopOK)
2354 return true;
2355 }
2356 }
2357 return false;
2358}
2359
2360
2361
2362
2363
2364
2365
2366void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2369 const BlockFilterSet &LoopBlockSet) {
2370 if (!ExitingBB)
2371 return;
2372
2375
2376
2377 if (Bottom == ExitingBB)
2378 return;
2379
2380
2382 return;
2383
2384 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2385
2386
2387
2388
2389 if (ViableTopFallthrough) {
2391 BlockChain *SuccChain = BlockToChain[Succ];
2392 if (!LoopBlockSet.count(Succ) &&
2393 (!SuccChain || Succ == *SuccChain->begin()))
2394 return;
2395 }
2396
2397
2398
2399 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2400 if (FallThrough2Top >= ExitFreq)
2401 return;
2402 }
2403
2404 BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
2405 if (ExitIt == LoopChain.end())
2406 return;
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427 if (ViableTopFallthrough) {
2428 assert(std::next(ExitIt) != LoopChain.end() &&
2429 "Exit should not be last BB");
2431 if (ExitingBB->isSuccessor(NextBlockInChain))
2433 return;
2434 }
2435
2437 << " at bottom\n");
2438 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2439}
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454void MachineBlockPlacement::rotateLoopWithProfile(
2455 BlockChain &LoopChain, const MachineLoop &L,
2456 const BlockFilterSet &LoopBlockSet) {
2457 auto RotationPos = LoopChain.end();
2459
2460
2462 return;
2463
2465
2466
2467
2470 if (Scale == 0)
2472
2473
2475 };
2476
2477
2478
2479
2481 for (auto *Pred : ChainHeaderBB->predecessors()) {
2482 BlockChain *PredChain = BlockToChain[Pred];
2483 if (!LoopBlockSet.count(Pred) &&
2484 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2485 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2487 auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
2488
2489
2491 FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
2492 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2493 }
2494 }
2495
2496
2497
2498
2499
2501 for (auto *BB : LoopChain) {
2503 for (auto *Succ : BB->successors()) {
2504 BlockChain *SuccChain = BlockToChain[Succ];
2505 if (!LoopBlockSet.count(Succ) &&
2506 (!SuccChain || Succ == *SuccChain->begin())) {
2508 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2509 }
2510 }
2512 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2514 }
2515 }
2516
2517
2518
2519
2520 for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2521 EndIter = LoopChain.end();
2522 Iter != EndIter; Iter++, TailIter++) {
2523
2524
2525 if (TailIter == LoopChain.end())
2526 TailIter = LoopChain.begin();
2527
2528 auto TailBB = *TailIter;
2529
2530
2532
2533
2534
2535
2536 if (Iter != LoopChain.begin())
2537 Cost += HeaderFallThroughCost;
2538
2539
2540
2541 for (auto &ExitWithFreq : ExitsWithFreq)
2542 if (TailBB != ExitWithFreq.first)
2543 Cost += ExitWithFreq.second;
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559 if (TailBB->isSuccessor(*Iter)) {
2560 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2561 if (TailBB->succ_size() == 1)
2563 else if (TailBB->succ_size() == 2) {
2565 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2567 ? TailBBFreq * TailToHeadProb.getCompl()
2568 : TailToHeadFreq;
2570 ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
2571 }
2572 }
2573
2574 LLVM_DEBUG(dbgs() << "The cost of loop rotation by making "
2577
2578 if (Cost < SmallestRotationCost) {
2579 SmallestRotationCost = Cost;
2580 RotationPos = Iter;
2581 }
2582 }
2583
2584 if (RotationPos != LoopChain.end()) {
2586 << " to the top\n");
2587 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2588 }
2589}
2590
2591
2592
2593
2594
2596MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
2597
2598
2599 struct MBBCompare {
2602 return X->getNumber() < Y->getNumber();
2603 }
2604 };
2605 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2618 for (auto *LoopPred : L.getHeader()->predecessors())
2619 if (.contains(LoopPred))
2620 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2622
2624 if (LoopBlockSet.count(LoopBB))
2625 continue;
2626 auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
2628 continue;
2629 BlockChain *Chain = BlockToChain[LoopBB];
2631 LoopBlockSet.insert(ChainBB);
2632 }
2633 } else
2634 LoopBlockSet.insert(L.block_begin(), L.block_end());
2635
2636
2637
2638
2639 BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2640 return Ret;
2641}
2642
2643
2644
2645
2646
2647
2648
2649void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
2650
2651
2653 buildLoopChains(*InnerLoop);
2654
2656 "BlockWorkList not empty when starting to build loop chains.");
2658 "EHPadWorkList not empty when starting to build loop chains.");
2659 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2660
2661
2662
2663
2664 bool RotateLoopWithProfile =
2667
2668
2669
2670
2671
2673
2674
2675
2676
2677
2678
2679
2680 PreferredLoopExit = nullptr;
2682 if (!RotateLoopWithProfile && LoopTop == L.getHeader())
2683 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2684
2685 BlockChain &LoopChain = *BlockToChain[LoopTop];
2686
2687
2688
2689
2691 assert(LoopChain.UnscheduledPredecessors == 0 &&
2692 "LoopChain should not have unscheduled predecessors.");
2693 UpdatedPreds.insert(&LoopChain);
2694
2696 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2697
2698 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2699
2700 if (RotateLoopWithProfile)
2701 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2702 else
2703 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2704
2706
2707 bool BadLoop = false;
2708 if (LoopChain.UnscheduledPredecessors) {
2709 BadLoop = true;
2710 dbgs() << "Loop chain contains a block without its preds placed!\n"
2711 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
2712 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
2713 }
2716 if (!LoopBlockSet.remove(ChainBB)) {
2717
2718
2719
2720 dbgs() << "Loop chain contains a block not contained by the loop!\n"
2721 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
2722 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
2723 << " Bad block: " << getBlockName(ChainBB) << "\n";
2724 }
2725 }
2726
2727 if (!LoopBlockSet.empty()) {
2728 BadLoop = true;
2730 dbgs() << "Loop contains blocks never placed into a chain!\n"
2731 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
2732 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
2733 << " Bad block: " << getBlockName(LoopBB) << "\n";
2734 }
2735 assert(!BadLoop && "Detected problems with the placement of this loop.");
2736 });
2737
2738 BlockWorkList.clear();
2739 EHPadWorkList.clear();
2740}
2741
2742void MachineBlockPlacement::buildCFGChains() {
2743
2744
2747 ++FI) {
2749 BlockChain *Chain =
2750 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
2751
2752
2753 while (true) {
2754 Cond.clear();
2757 break;
2758
2761
2762
2763 assert(NextFI != FE && "Can't fallthrough past the last block.");
2764 LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
2766 << "\n");
2767 Chain->merge(NextBB, nullptr);
2768#ifndef NDEBUG
2769 BlocksWithUnanalyzableExits.insert(&*BB);
2770#endif
2771 FI = NextFI;
2772 BB = NextBB;
2773 }
2774 }
2775
2776
2777 PreferredLoopExit = nullptr;
2779 buildLoopChains(*L);
2780
2782 "BlockWorkList should be empty before building final chain.");
2784 "EHPadWorkList should be empty before building final chain.");
2785
2788 fillWorkLists(&MBB, UpdatedPreds);
2789
2790 BlockChain &FunctionChain = *BlockToChain[&F->front()];
2791 buildChain(&F->front(), FunctionChain);
2792
2793#ifndef NDEBUG
2795#endif
2797
2798 bool BadFunc = false;
2799 FunctionBlockSetType FunctionBlockSet;
2801 FunctionBlockSet.insert(&MBB);
2802
2804 if (!FunctionBlockSet.erase(ChainBB)) {
2805 BadFunc = true;
2806 dbgs() << "Function chain contains a block not in the function!\n"
2807 << " Bad block: " << getBlockName(ChainBB) << "\n";
2808 }
2809
2810 if (!FunctionBlockSet.empty()) {
2811 BadFunc = true;
2812 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2813 dbgs() << "Function contains blocks never placed into a chain!\n"
2814 << " Bad block: " << getBlockName(RemainingBB) << "\n";
2815 }
2816 assert(!BadFunc && "Detected problems with the block placement.");
2817 });
2818
2819
2820
2822 F->getNumBlockIDs());
2823 {
2826 if (LastMBB != nullptr)
2827 OriginalLayoutSuccessors[LastMBB->getNumber()] = &MBB;
2828 LastMBB = &MBB;
2829 }
2830 OriginalLayoutSuccessors[F->back().getNumber()] = nullptr;
2831 }
2832
2833
2835 LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");
2837 LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
2838 : " ... ")
2841 F->splice(InsertPos, ChainBB);
2842 else
2843 ++InsertPos;
2844
2845
2846 if (ChainBB == *FunctionChain.begin())
2847 continue;
2849
2850
2851
2852
2853 Cond.clear();
2855
2856#ifndef NDEBUG
2857 if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
2858
2859
2860
2863 "Unexpected block with un-analyzable fallthrough!");
2864 Cond.clear();
2865 TBB = FBB = nullptr;
2866 }
2867#endif
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2892 }
2893 }
2894
2895
2896 Cond.clear();
2901 }
2902
2903 BlockWorkList.clear();
2904 EHPadWorkList.clear();
2905}
2906
2907void MachineBlockPlacement::optimizeBranches() {
2908 BlockChain &FunctionChain = *BlockToChain[&F->front()];
2910
2911
2912
2913
2914
2915
2916
2918 Cond.clear();
2921 continue;
2922 if ( || !FBB || Cond.empty())
2923 continue;
2924
2925
2926
2928 continue;
2929
2930
2933 continue;
2935 continue;
2936 LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "
2939 << "\n");
2940 auto Dl = ChainBB->findBranchDebugLoc();
2943 }
2944}
2945
2946void MachineBlockPlacement::alignBlocks() {
2947
2948
2949
2950
2951
2953 if (F->getFunction().hasMinSize() ||
2955 return;
2956 }
2957
2958 BlockChain &FunctionChain = *BlockToChain[&F->front()];
2959
2960 if (FunctionChain.begin() == FunctionChain.end())
2961 return;
2962
2964 BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
2965 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
2967 if (ChainBB == *FunctionChain.begin())
2968 continue;
2969
2970
2971
2972
2973
2975 if (!L)
2976 continue;
2977
2979 unsigned MDAlign = 1;
2980 MDNode *LoopID = L->getLoopID();
2981 if (LoopID) {
2983 MDNode *MD = dyn_cast(MDO);
2984 if (MD == nullptr)
2985 continue;
2987 if (S == nullptr)
2988 continue;
2989 if (S->getString() == "llvm.loop.align") {
2991 "per-loop align metadata should have two operands.");
2992 MDAlign =
2993 mdconst::extract(MD->getOperand(1))->getZExtValue();
2994 assert(MDAlign >= 1 && "per-loop align value must be positive.");
2995 }
2996 }
2997 }
2998
2999
3000 const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));
3001 if (LoopAlign == 1)
3002 continue;
3003
3004
3005
3007 if (Freq < WeightedEntryFreq)
3008 continue;
3009
3010
3011
3013 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
3014 if (Freq < (LoopHeaderFreq * ColdProb))
3015 continue;
3016
3017
3020 continue;
3021
3022
3023
3026
3027 auto DetermineMaxAlignmentPadding = [&]() {
3028
3029 unsigned MaxBytes;
3032 else
3034 ChainBB->setMaxBytesForAlignment(MaxBytes);
3035 };
3036
3037
3038
3039 if (!LayoutPred->isSuccessor(ChainBB)) {
3040 ChainBB->setAlignment(LoopAlign);
3041 DetermineMaxAlignmentPadding();
3042 continue;
3043 }
3044
3045
3046
3047
3048
3051 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3052 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3053 ChainBB->setAlignment(LoopAlign);
3054 DetermineMaxAlignmentPadding();
3055 }
3056 }
3057
3058 const bool HasMaxBytesOverride =
3060
3062
3064 if (HasMaxBytesOverride)
3067 else
3069 }
3071
3072
3073 for (auto MBI = std::next(F->begin()), MBE = F->end(); MBI != MBE; ++MBI) {
3074 auto LayoutPred = std::prev(MBI);
3076 if (HasMaxBytesOverride)
3079 else
3081 }
3082 }
3083 }
3084}
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3105 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3106 bool Removed, DuplicatedToLPred;
3107 bool DuplicatedToOriginalLPred;
3108 Removed = maybeTailDuplicateBlock(
3109 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3110 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3111 if (!Removed)
3112 return false;
3113 DuplicatedToOriginalLPred = DuplicatedToLPred;
3114
3115
3116
3117
3118 while (DuplicatedToLPred && Removed) {
3120
3121
3122
3123
3124
3125 BlockChain::iterator ChainEnd = Chain.end();
3126 DupBB = *(--ChainEnd);
3127
3128 if (ChainEnd == Chain.begin())
3129 break;
3130 DupPred = *std::prev(ChainEnd);
3131 Removed = maybeTailDuplicateBlock(
3132 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3133 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3134 }
3135
3136
3137
3138
3139
3140 LPred = *std::prev(Chain.end());
3141 if (DuplicatedToOriginalLPred)
3142 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3143 return true;
3144}
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159bool MachineBlockPlacement::maybeTailDuplicateBlock(
3162 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3163 bool &DuplicatedToLPred) {
3164 DuplicatedToLPred = false;
3165 if (!shouldTailDuplicate(BB))
3166 return false;
3167
3169 << "\n");
3170
3171
3172
3173 bool Removed = false;
3175
3176 Removed = true;
3177
3178
3179 bool InWorkList = true;
3180
3181 if (BlockToChain.count(RemBB)) {
3182 BlockChain *Chain = BlockToChain[RemBB];
3183 InWorkList = Chain->UnscheduledPredecessors == 0;
3184 Chain->remove(RemBB);
3185 BlockToChain.erase(RemBB);
3186 }
3187
3188
3189 if (&(*PrevUnplacedBlockIt) == RemBB) {
3190 PrevUnplacedBlockIt++;
3191 }
3192
3193
3194 if (InWorkList) {
3196 if (RemBB->isEHPad())
3197 RemoveList = EHPadWorkList;
3199 }
3200
3201
3202 if (BlockFilter) {
3203 auto It = llvm::find(*BlockFilter, RemBB);
3204
3205
3206 if (It != BlockFilter->end()) {
3207 if (It < PrevUnplacedBlockInFilterIt) {
3209
3210
3211 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3212 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance;
3213 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3214 (void)PrevBB;
3215 } else if (It == PrevUnplacedBlockInFilterIt)
3216
3217
3218 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3219 else
3220 BlockFilter->erase(It);
3221 }
3222 }
3223
3224
3225 MLI->removeBlock(RemBB);
3226 if (RemBB == PreferredLoopExit)
3227 PreferredLoopExit = nullptr;
3228
3230 << "\n");
3231 };
3232 auto RemovalCallbackRef =
3234
3236 bool IsSimple = TailDup.isSimpleBB(BB);
3239 if (F->getFunction().hasProfileData()) {
3240
3241 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3242 if (CandidatePreds.size() == 0)
3243 return false;
3245 CandidatePtr = &CandidatePreds;
3246 }
3248 &RemovalCallbackRef, CandidatePtr);
3249
3250
3251 DuplicatedToLPred = false;
3253
3254 BlockChain *PredChain = BlockToChain[Pred];
3255 if (Pred == LPred)
3256 DuplicatedToLPred = true;
3257 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3258 PredChain == &Chain)
3259 continue;
3261 if (BlockFilter && !BlockFilter->count(NewSucc))
3262 continue;
3263 BlockChain *NewChain = BlockToChain[NewSucc];
3264 if (NewChain != &Chain && NewChain != PredChain)
3265 NewChain->UnscheduledPredecessors++;
3266 }
3267 }
3268 return Removed;
3269}
3270
3271
3275 if (.isPHI() &&
.isMetaInstruction())
3277 }
3279}
3280
3281
3282
3283
3286}
3287
3288
3289bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
3291 BlockFilterSet *BlockFilter) {
3292 if (BB == Pred)
3293 return false;
3294 if (BlockFilter && !BlockFilter->count(Pred))
3295 return false;
3296 BlockChain *PredChain = BlockToChain[Pred];
3297 if (PredChain && (Pred != *std::prev(PredChain->end())))
3298 return false;
3299
3300
3303 if (Succ != BB) {
3304 if (BlockFilter && !BlockFilter->count(Succ))
3305 continue;
3306 BlockChain *SuccChain = BlockToChain[Succ];
3307 if (SuccChain && (Succ != *SuccChain->begin()))
3308 continue;
3310 if (SuccProb > BestProb)
3311 BestProb = SuccProb;
3312 }
3313
3315 if (BBProb <= BestProb)
3316 return false;
3317
3318
3319
3320 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3321 BlockFrequency Gain = PredFreq * (BBProb - BestProb);
3322 return Gain > scaleThreshold(BB);
3323}
3324
3325
3326
3327void MachineBlockPlacement::findDuplicateCandidates(
3329 BlockFilterSet *BlockFilter) {
3335
3336
3339 };
3341 return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(B);
3342 };
3345
3346 auto SuccIt = Succs.begin();
3347 if (SuccIt != Succs.end()) {
3349 }
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3393 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3394
3396
3397
3398 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3399 Fallthrough = Pred;
3400 if (SuccIt != Succs.end())
3401 SuccIt++;
3402 }
3403 continue;
3404 }
3405
3406 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3408 if (SuccIt == Succs.end()) {
3409
3410 if (Succs.size() > 0)
3411 DupCost += PredFreq;
3412 } else {
3413
3414 DupCost += PredFreq;
3416 }
3417
3418 assert(OrigCost >= DupCost);
3419 OrigCost -= DupCost;
3420 if (OrigCost > BBDupThreshold) {
3422 if (SuccIt != Succs.end())
3423 SuccIt++;
3424 }
3425 }
3426
3427
3428
3429 if (!Fallthrough) {
3430 if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) {
3431 Candidates[0] = Candidates.back();
3433 }
3434 }
3435}
3436
3437void MachineBlockPlacement::initTailDupThreshold() {
3439 if (F->getFunction().hasProfileData()) {
3440
3443 UseProfileCount = true;
3444 DupThreshold =
3446 } else {
3447
3451 if (Freq > MaxFreq)
3452 MaxFreq = Freq;
3453 }
3454
3456 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);
3457 UseProfileCount = false;
3458 }
3459 }
3460
3462
3466
3467
3468
3469 if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) {
3470
3471
3472
3476 }
3477
3478
3479
3481 (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive ||
3483 TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
3484}
3485
3486bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
3488 return false;
3489
3490
3491 if (std::next(MF.begin()) == MF.end())
3492 return false;
3493
3494 F = &MF;
3495 MBPI = &getAnalysis().getMBPI();
3496 MBFI = std::make_unique(
3497 getAnalysis().getMBFI());
3498 MLI = &getAnalysis().getLI();
3501 MPDT = nullptr;
3502 PSI = &getAnalysis().getPSI();
3503 PassConfig = &getAnalysis();
3504
3505
3506
3507 PreferredLoopExit = nullptr;
3508
3510 "BlockToChain map should be empty before starting placement.");
3512 "Computed Edge map should be empty before starting placement.");
3513
3514
3515 initTailDupThreshold();
3516
3517 const bool OptForSize =
3519
3520
3521
3522 bool UseExtTspForPerf = false;
3523 bool UseExtTspForSize = false;
3525 UseExtTspForPerf =
3529 }
3530
3531
3532 if (allowTailDupPlacement()) {
3533 MPDT = &getAnalysis().getPostDomTree();
3534 if (OptForSize)
3535 TailDupSize = 1;
3536 const bool PreRegAlloc = false;
3537 TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3538 true, TailDupSize);
3539 if (!UseExtTspForSize)
3540 precomputeTriangleChains();
3541 }
3542
3543
3544 if (!UseExtTspForSize)
3545 buildCFGChains();
3546
3547
3548
3549
3553
3554 if (EnableTailMerge) {
3556 BranchFolder BF(true, false,
3558
3560 true)) {
3561
3562 if (MPDT)
3564 if (!UseExtTspForSize) {
3565
3566 BlockToChain.clear();
3567 ComputedEdges.clear();
3569 buildCFGChains();
3570 }
3571 }
3572 }
3573
3574
3575
3576
3577 if (UseExtTspForPerf || UseExtTspForSize) {
3579 !(UseExtTspForPerf && UseExtTspForSize) &&
3580 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3581 applyExtTsp(UseExtTspForSize);
3582 createCFGChainExtTsp();
3583 }
3584
3585 optimizeBranches();
3586 alignBlocks();
3587
3588 BlockToChain.clear();
3589 ComputedEdges.clear();
3591
3592
3598 MBFI->view("MBP." + MF.getName(), false);
3599 }
3600
3601
3602
3603 return true;
3604}
3605
3606void MachineBlockPlacement::applyExtTsp(bool OptForSize) {
3607
3609 BlockIndex.reserve(F->size());
3610 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3611 CurrentBlockOrder.reserve(F->size());
3612 size_t NumBlocks = 0;
3614 BlockIndex[&MBB] = NumBlocks++;
3615 CurrentBlockOrder.push_back(&MBB);
3616 }
3617
3624
3626 BlockCounts[BlockIndex[&MBB]] = OptForSize ? 1 : BlockFreq.getFrequency();
3627
3628
3629
3630
3631
3632
3633 auto NonDbgInsts =
3635 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3636 BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts;
3637
3638
3639 if (OptForSize) {
3640 Cond.clear();
3643 continue;
3644
3646
3647
3648
3652 if (FBB && FBB != FTB)
3654 if (FTB)
3656
3657
3658
3659 const uint64_t Freq = Succs.size() == 1 ? 110 : 100;
3661 JumpCounts.push_back({BlockIndex[&MBB], BlockIndex[Succ], Freq});
3662 } else {
3667 {BlockIndex[&MBB], BlockIndex[Succ], JumpFreq.getFrequency()});
3668 }
3669 }
3670 }
3671
3672 LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size()
3673 << " with profile = " << F->getFunction().hasProfileData()
3674 << " (" << F->getName() << ")" << "\n");
3675
3676 const double OrgScore = calcExtTspScore(BlockSizes, JumpCounts);
3678
3679
3680 auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
3681 std::vector<const MachineBasicBlock *> NewBlockOrder;
3682 NewBlockOrder.reserve(F->size());
3684 NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3685 }
3686 const double OptScore = calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3688
3689
3690 if (OptForSize && OrgScore > OptScore)
3691 assignBlockOrder(CurrentBlockOrder);
3692 else
3693 assignBlockOrder(NewBlockOrder);
3694}
3695
3696void MachineBlockPlacement::assignBlockOrder(
3697 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3698 assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order");
3699 F->RenumberBlocks();
3700
3701
3702
3703
3704 MPDT = nullptr;
3705
3706 bool HasChanges = false;
3707 for (size_t I = 0; I < NewBlockOrder.size(); I++) {
3708 if (NewBlockOrder[I] != F->getBlockNumbered(I)) {
3709 HasChanges = true;
3710 break;
3711 }
3712 }
3713
3714 if (!HasChanges)
3715 return;
3716
3720 }
3721
3722
3725 NewIndex[MBB] = NewIndex.size();
3726 }
3728 return NewIndex[&L] < NewIndex[&R];
3729 });
3730
3731
3732
3738 auto *FTMBB = PrevFallThroughs[MBB.getNumber()];
3739
3740
3741
3742 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3744 }
3745
3746
3747 Cond.clear();
3750 continue;
3752 }
3753}
3754
3755void MachineBlockPlacement::createCFGChainExtTsp() {
3756 BlockToChain.clear();
3757 ComputedEdges.clear();
3759
3761 BlockChain *FunctionChain =
3762 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB);
3763
3765 if (HeadBB == &MBB)
3766 continue;
3767 FunctionChain->merge(&MBB, nullptr);
3768 }
3769}
3770
3771namespace {
3772
3773
3774
3775
3776
3777
3778
3780
3782
3783
3785
3786public:
3787 static char ID;
3788
3791 }
3792
3794
3800 }
3801};
3802
3803}
3804
3805char MachineBlockPlacementStats::ID = 0;
3806
3808
3810 "Basic Block Placement Stats", false, false)
3815
3816bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
3817
3818 if (std::next(F.begin()) == F.end())
3819 return false;
3820
3822 return false;
3823
3824 MBPI = &getAnalysis().getMBPI();
3825 MBFI = &getAnalysis().getMBFI();
3826
3830 (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3832 (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
3834
3836 continue;
3837
3840 ++NumBranches;
3842 }
3843 }
3844
3845 return false;
3846}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
block placement Basic Block Placement Stats
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
Branch Probability Basic Block Placement
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
unify loop Fixup each natural loop to have a single exit block
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
Utility class to perform tail duplication.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
Target-Independent Code Generator Pass Configuration Options.
bool getEnableTailMerge() const
CodeGenOptLevel getOptLevel() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
A raw_ostream that writes to an std::string.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > ApplyExtTspWithoutProfile
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
cl::opt< unsigned > StaticLikelyProb
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
void initializeMachineBlockPlacementPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.