LLVM: lib/CodeGen/MachineBlockPlacement.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
67#include
68#include
69#include
70#include
71#include
72#include
73#include
74#include
75#include
76
77using namespace llvm;
78
79#define DEBUG_TYPE "block-placement"
80
81STATISTIC(NumCondBranches, "Number of conditional branches");
82STATISTIC(NumUncondBranches, "Number of unconditional branches");
84 "Potential frequency of taking conditional branches");
86 "Potential frequency of taking unconditional branches");
87
89 "align-all-blocks",
90 cl::desc("Force the alignment of all blocks in the function in log2 format "
91 "(e.g 4 means align on 16B boundaries)."),
93
95 "align-all-nofallthru-blocks",
96 cl::desc("Force the alignment of all blocks that have no fall-through "
97 "predecessors (i.e. don't add nops that are executed). In log2 "
98 "format (e.g 4 means align on 16B boundaries)."),
100
102 "max-bytes-for-alignment",
103 cl::desc("Forces the maximum bytes allowed to be emitted when padding for "
104 "alignment"),
106
108 "block-placement-predecessor-limit",
109 cl::desc("For blocks with more predecessors, certain layout optimizations"
110 "will be disabled to prevent quadratic compile time."),
112
113
115 "block-placement-exit-block-bias",
116 cl::desc("Block frequency percentage a loop exit block needs "
117 "over the original exit to be considered the new exit."),
119
120
121
122
124 "loop-to-cold-block-ratio",
125 cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
126 "(frequency of block) is greater than this ratio"),
128
131 cl::desc("Force outlining cold blocks from loops."),
133
136 cl::desc("Model the cost of loop rotation more "
137 "precisely by using profile data."),
139
142 cl::desc("Force the use of precise cost "
143 "loop rotation strategy."),
145
147 "misfetch-cost",
148 cl::desc("Cost that models the probabilistic risk of an instruction "
149 "misfetch due to a jump comparing to falling through, whose cost "
150 "is zero."),
152
154 cl::desc("Cost of jump instructions."),
158 cl::desc("Perform tail duplication during placement. "
159 "Creates more fallthrough opportunities in "
160 "outline branches."),
162
165 cl::desc("Perform branch folding during placement. "
166 "Reduces code size."),
168
169
171 "tail-dup-placement-threshold",
172 cl::desc("Instruction cutoff for tail duplication during layout. "
173 "Tail merging during layout is forced to have a threshold "
174 "that won't conflict."),
176
177
179 "tail-dup-placement-aggressive-threshold",
180 cl::desc("Instruction cutoff for aggressive tail duplication during "
181 "layout. Used at -O3. Tail merging during layout is forced to "
182 "have a threshold that won't conflict."),
184
185
187 "tail-dup-placement-penalty",
189 "Cost penalty for blocks that can avoid breaking CFG by copying. "
190 "Copying can increase fallthrough, but it also increases icache "
191 "pressure. This parameter controls the penalty to account for that. "
192 "Percent as integer."),
194
195
197 "tail-dup-profile-percent-threshold",
198 cl::desc("If profile count information is used in tail duplication cost "
199 "model, the gained fall through number from tail duplication "
200 "should be at least this percent of hot count."),
202
203
205 "triangle-chain-count",
206 cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
207 "triangle tail duplication heuristic to kick in. 0 to disable."),
209
210
211
212
213
214
216 "renumber-blocks-before-view",
218 "If true, basic blocks are re-numbered before MBP layout is printed "
219 "into a dot graph. Only used when a function is being printed."),
221
223 "ext-tsp-block-placement-max-blocks",
224 cl::desc("Maximum number of basic blocks in a function to run ext-TSP "
225 "block placement."),
227
228
231 cl::desc("Use ext-tsp for size-aware block placement."));
232
233namespace llvm {
238
239
240
241
243
244
245
247}
248
249namespace {
250
251class BlockChain;
252
253
255
256
257
258
259
260
261
262
263
264
265
266
267class BlockChain {
268
269
270
271
273
274
275
276
277
278
279
280 BlockToChainMapType &BlockToChain;
281
282public:
283
284
285
286
287
288 BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
289 : Blocks(1, BB), BlockToChain(BlockToChain) {
290 assert(BB && "Cannot create a chain with a null basic block");
291 BlockToChain[BB] = this;
292 }
293
294
297
298
301
302
305
307 for (iterator i = begin(); i != end(); ++i) {
308 if (*i == BB) {
310 return true;
311 }
312 }
313 return false;
314 }
315
316
317
318
319
320
321
323 assert(BB && "Can't merge a null block.");
324 assert(!Blocks.empty() && "Can't merge into an empty chain.");
325
326
327 if (!Chain) {
328 assert(!BlockToChain[BB] &&
329 "Passed chain is null, but BB has entry in BlockToChain.");
331 BlockToChain[BB] = this;
332 return;
333 }
334
335 assert(BB == *Chain->begin() && "Passed BB is not head of Chain.");
336 assert(Chain->begin() != Chain->end());
337
338
339
342 assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain.");
343 BlockToChain[ChainBB] = this;
344 }
345 }
346
347#ifndef NDEBUG
348
351 MBB->dump();
352 }
353#endif
354
355
356
357
358
359
360
361
362
363
364 unsigned UnscheduledPredecessors = 0;
365};
366
367class MachineBlockPlacement {
368
369 using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
370
371
372 struct BlockAndTailDupResult {
373 MachineBasicBlock *BB = nullptr;
374 bool ShouldTailDup;
375 };
376
377
378 struct WeightedEdge {
379 BlockFrequency Weight;
380 MachineBasicBlock *Src = nullptr;
381 MachineBasicBlock *Dest = nullptr;
382 };
383
384
387
388
389 DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
390
391
392 MachineFunction *F = nullptr;
393
394
395 const MachineBranchProbabilityInfo *MBPI = nullptr;
396
397
398 std::unique_ptr MBFI;
399
400
401 MachineLoopInfo *MLI = nullptr;
402
403
404
405
406 MachineBasicBlock *PreferredLoopExit = nullptr;
407
408
409 const TargetInstrInfo *TII = nullptr;
410
411
412 const TargetLoweringBase *TLI = nullptr;
413
414
415 MachinePostDominatorTree *MPDT = nullptr;
416
417 ProfileSummaryInfo *PSI = nullptr;
418
419
420
421 bool AllowTailMerge;
422
424
425
426
427
428
429
430 TailDuplicator TailDup;
431
432
433 BlockFrequency DupThreshold;
434
435 unsigned TailDupSize;
436
437
438
439 bool UseProfileCount = false;
440
441
442
443
444
445
446
447
448 SpecificBumpPtrAllocator ChainAllocator;
449
450
451
452
453
454
455
456 DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
457
458#ifndef NDEBUG
459
460
461
462
463 SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
464#endif
465
466
467
468 BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) {
469 if (UseProfileCount) {
470 auto Count = MBFI->getBlockProfileCount(BB);
472 return BlockFrequency(*Count);
473 else
474 return BlockFrequency(0);
475 } else
476 return MBFI->getBlockFreq(BB);
477 }
478
479
480 BlockFrequency scaleThreshold(MachineBasicBlock *BB);
481 void initTailDupThreshold();
482
483
484
485 void markChainSuccessors(const BlockChain &Chain,
486 const MachineBasicBlock *LoopHeaderBB,
487 const BlockFilterSet *BlockFilter = nullptr);
488
489
490
491 void markBlockSuccessors(const BlockChain &Chain, const MachineBasicBlock *BB,
492 const MachineBasicBlock *LoopHeaderBB,
493 const BlockFilterSet *BlockFilter = nullptr);
494
495 BranchProbability
496 collectViableSuccessors(const MachineBasicBlock *BB, const BlockChain &Chain,
497 const BlockFilterSet *BlockFilter,
498 SmallVector<MachineBasicBlock *, 4> &Successors);
499 bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
500 BlockFilterSet *BlockFilter);
501 void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
502 MachineBasicBlock *BB,
503 BlockFilterSet *BlockFilter);
504 bool repeatedlyTailDuplicateBlock(
505 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
506 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
507 BlockFilterSet *BlockFilter,
510 bool
511 maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
512 BlockChain &Chain, BlockFilterSet *BlockFilter,
515 bool &DuplicatedToLPred);
516 bool hasBetterLayoutPredecessor(const MachineBasicBlock *BB,
517 const MachineBasicBlock *Succ,
518 const BlockChain &SuccChain,
519 BranchProbability SuccProb,
520 BranchProbability RealSuccProb,
521 const BlockChain &Chain,
522 const BlockFilterSet *BlockFilter);
523 BlockAndTailDupResult selectBestSuccessor(const MachineBasicBlock *BB,
524 const BlockChain &Chain,
525 const BlockFilterSet *BlockFilter);
526 MachineBasicBlock *
527 selectBestCandidateBlock(const BlockChain &Chain,
528 SmallVectorImpl<MachineBasicBlock *> &WorkList);
529 MachineBasicBlock *
530 getFirstUnplacedBlock(const BlockChain &PlacedChain,
532 MachineBasicBlock *
533 getFirstUnplacedBlock(const BlockChain &PlacedChain,
535 const BlockFilterSet *BlockFilter);
536
537
538
539
540
541
542 void fillWorkLists(const MachineBasicBlock *MBB,
543 SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
544 const BlockFilterSet *BlockFilter);
545
546 void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
547 BlockFilterSet *BlockFilter = nullptr);
548 bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
549 const MachineBasicBlock *OldTop);
550 bool hasViableTopFallthrough(const MachineBasicBlock *Top,
551 const BlockFilterSet &LoopBlockSet);
552 BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
553 const BlockFilterSet &LoopBlockSet);
554 BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
555 const MachineBasicBlock *OldTop,
556 const MachineBasicBlock *ExitBB,
557 const BlockFilterSet &LoopBlockSet);
558 MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
559 const MachineLoop &L,
560 const BlockFilterSet &LoopBlockSet);
561 MachineBasicBlock *findBestLoopTop(const MachineLoop &L,
562 const BlockFilterSet &LoopBlockSet);
563 MachineBasicBlock *findBestLoopExit(const MachineLoop &L,
564 const BlockFilterSet &LoopBlockSet,
565 BlockFrequency &ExitFreq);
566 BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
567 void buildLoopChains(const MachineLoop &L);
568 void rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
569 BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
570 void rotateLoopWithProfile(BlockChain &LoopChain, const MachineLoop &L,
571 const BlockFilterSet &LoopBlockSet);
572 void buildCFGChains();
573 void optimizeBranches();
574 void alignBlocks();
575
576
577 bool shouldTailDuplicate(MachineBasicBlock *BB);
578
579
580 bool isProfitableToTailDup(const MachineBasicBlock *BB,
581 const MachineBasicBlock *Succ,
582 BranchProbability QProb, const BlockChain &Chain,
583 const BlockFilterSet *BlockFilter);
584
585
586 bool isTrellis(const MachineBasicBlock *BB,
587 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
588 const BlockChain &Chain, const BlockFilterSet *BlockFilter);
589
590
591 BlockAndTailDupResult getBestTrellisSuccessor(
592 const MachineBasicBlock *BB,
593 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
594 BranchProbability AdjustedSumProb, const BlockChain &Chain,
595 const BlockFilterSet *BlockFilter);
596
597
598 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
599 const MachineBasicBlock *BB,
601
602
603
604 bool canTailDuplicateUnplacedPreds(const MachineBasicBlock *BB,
605 MachineBasicBlock *Succ,
606 const BlockChain &Chain,
607 const BlockFilterSet *BlockFilter);
608
609
610
611 void precomputeTriangleChains();
612
613
614 void applyExtTsp(bool OptForSize);
615
616
617 void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder);
618
619
620 void createCFGChainExtTsp();
621
622public:
623 MachineBlockPlacement(const MachineBranchProbabilityInfo *MBPI,
624 MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
625 std::unique_ptr MBFI,
626 MachinePostDominatorTree *MPDT, bool AllowTailMerge)
627 : MBPI(MBPI), MBFI(std::move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
628 AllowTailMerge(AllowTailMerge) {};
629
630 bool run(MachineFunction &F);
631
632 static bool allowTailDupPlacement(MachineFunction &MF) {
634 }
635};
636
638public:
639 static char ID;
640
641 MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {
643 }
644
645 bool runOnMachineFunction(MachineFunction &MF) override {
647 return false;
648
649 auto *MBPI =
650 &getAnalysis().getMBPI();
651 auto MBFI = std::make_unique(
652 getAnalysis().getMBFI());
653 auto *MLI = &getAnalysis().getLI();
654 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
655 ? &getAnalysis()
656 .getPostDomTree()
657 : nullptr;
658 auto *PSI = &getAnalysis().getPSI();
659 auto *PassConfig = &getAnalysis();
660 bool AllowTailMerge = PassConfig->getEnableTailMerge();
661 return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
662 AllowTailMerge)
663 .run(MF);
664 }
665
666 void getAnalysisUsage(AnalysisUsage &AU) const override {
667 AU.addRequired();
668 AU.addRequired();
670 AU.addRequired();
671 AU.addRequired();
672 AU.addRequired();
675 }
676};
677
678}
679
680char MachineBlockPlacementLegacy::ID = 0;
681
683
685 "Branch Probability Basic Block Placement", false, false)
692 "Branch Probability Basic Block Placement", false, false)
693
694#ifndef NDEBUG
695
696
697
699 std::string Result;
702 OS << " ('" << BB->getName() << "')";
704 return Result;
705}
706#endif
707
708
709
710
711
712
713
714void MachineBlockPlacement::markChainSuccessors(
716 const BlockFilterSet *BlockFilter) {
717
718
719 for (MachineBasicBlock *MBB : Chain) {
720 markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
721 }
722}
723
724
725
726
727
728
729
730void MachineBlockPlacement::markBlockSuccessors(
731 const BlockChain &Chain, const MachineBasicBlock *MBB,
732 const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {
733
734
735
736
737 for (MachineBasicBlock *Succ : MBB->successors()) {
738 if (BlockFilter && !BlockFilter->count(Succ))
739 continue;
740 BlockChain &SuccChain = *BlockToChain[Succ];
741
742 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
743 continue;
744
745
746
747 if (SuccChain.UnscheduledPredecessors == 0 ||
748 --SuccChain.UnscheduledPredecessors > 0)
749 continue;
750
751 auto *NewBB = *SuccChain.begin();
752 if (NewBB->isEHPad())
754 else
756 }
757}
758
759
760
761
762
763BranchProbability MachineBlockPlacement::collectViableSuccessors(
764 const MachineBasicBlock *BB, const BlockChain &Chain,
765 const BlockFilterSet *BlockFilter,
766 SmallVector<MachineBasicBlock *, 4> &Successors) {
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
784 for (MachineBasicBlock *Succ : BB->successors()) {
785 bool SkipSucc = false;
786 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
787 SkipSucc = true;
788 } else {
789 BlockChain *SuccChain = BlockToChain[Succ];
790 if (SuccChain == &Chain) {
791 SkipSucc = true;
792 } else if (Succ != *SuccChain->begin()) {
794 << " -> Mid chain!\n");
795 continue;
796 }
797 }
798 if (SkipSucc)
800 else
802 }
803
804 return AdjustedSumProb;
805}
806
807
808
809static BranchProbability
815 if (SuccProbN >= SuccProbD)
817 else
819
820 return SuccProb;
821}
822
823
824static bool
828 return false;
829
830 if (Successors.count(&BB))
831 return false;
833 if (!Successors.count(Succ))
834 return false;
835 return true;
836}
837
838
839
840
841bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
842
843
844
845 bool IsSimple = TailDup.isSimpleBB(BB);
846
848 return false;
850}
851
852
853
854
855
856
857
862 return (Gain / ThresholdProb) >= EntryFreq;
863}
864
865
866
867
868
869
870bool MachineBlockPlacement::isProfitableToTailDup(
871 const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
872 BranchProbability QProb, const BlockChain &Chain,
873 const BlockFilterSet *BlockFilter) {
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897 MachineBasicBlock *PDom = nullptr;
898 SmallVector<MachineBasicBlock *, 4> SuccSuccs;
899
900 auto AdjustedSuccSumProb =
901 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
903 auto BBFreq = MBFI->getBlockFreq(BB);
904 auto SuccFreq = MBFI->getBlockFreq(Succ);
905 BlockFrequency P = BBFreq * PProb;
906 BlockFrequency Qout = BBFreq * QProb;
907 BlockFrequency EntryFreq = MBFI->getEntryFreq();
908
909
910 if (SuccSuccs.size() == 0)
912
914
915 for (MachineBasicBlock *SuccSucc : SuccSuccs) {
917 if (Prob > BestSuccSucc)
918 BestSuccSucc = Prob;
919 if (PDom == nullptr)
920 if (MPDT->dominates(SuccSucc, Succ)) {
921 PDom = SuccSucc;
922 break;
923 }
924 }
925
926
927 auto SuccBestPred = BlockFrequency(0);
928 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
929 if (SuccPred == Succ || SuccPred == BB ||
930 BlockToChain[SuccPred] == &Chain ||
931 (BlockFilter && !BlockFilter->count(SuccPred)))
932 continue;
933 auto Freq =
934 MBFI->getBlockFreq(SuccPred) * MBPI->getEdgeProbability(SuccPred, Succ);
935 if (Freq > SuccBestPred)
936 SuccBestPred = Freq;
937 }
938
939 BlockFrequency Qin = SuccBestPred;
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959 if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
960 BranchProbability UProb = BestSuccSucc;
961 BranchProbability VProb = AdjustedSuccSumProb - UProb;
962 BlockFrequency F = SuccFreq - Qin;
963 BlockFrequency V = SuccFreq * VProb;
964 BlockFrequency QinU = std::min(Qin, F) * UProb;
965 BlockFrequency BaseCost = P + V;
966 BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;
968 }
970 BranchProbability VProb = AdjustedSuccSumProb - UProb;
971 BlockFrequency U = SuccFreq * UProb;
972 BlockFrequency V = SuccFreq * VProb;
973 BlockFrequency F = SuccFreq - Qin;
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003 if (UProb > AdjustedSuccSumProb / 2 &&
1004 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
1005 Chain, BlockFilter))
1006
1008 (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
1009 EntryFreq);
1010
1012 (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
1013 std::max(Qin, F) * UProb),
1014 EntryFreq);
1015}
1016
1017
1018
1019
1020
1021
1022
1023
1024bool MachineBlockPlacement::isTrellis(
1025 const MachineBasicBlock *BB,
1026 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1027 const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
1028
1029
1030 if (BB->succ_size() != 2 || ViableSuccs.size() != 2)
1031 return false;
1032
1033 SmallPtrSet<const MachineBasicBlock *, 2> Successors(llvm::from_range,
1035
1036 SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
1037
1038 for (MachineBasicBlock *Succ : ViableSuccs) {
1039
1040
1042 return false;
1043
1044 int PredCount = 0;
1045 for (auto *SuccPred : Succ->predecessors()) {
1046
1047 if (Successors.count(SuccPred)) {
1048
1049 for (MachineBasicBlock *CheckSucc : SuccPred->successors())
1050 if (!Successors.count(CheckSucc))
1051 return false;
1052 continue;
1053 }
1054 const BlockChain *PredChain = BlockToChain[SuccPred];
1055 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1056 PredChain == &Chain || PredChain == BlockToChain[Succ])
1057 continue;
1058 ++PredCount;
1059
1060 if (!SeenPreds.insert(SuccPred).second)
1061 continue;
1063 return false;
1064 }
1065
1066
1067 if (PredCount < 1)
1068 return false;
1069 }
1070 return true;
1071}
1072
1073
1074
1075
1076
1077
1078std::pair<MachineBlockPlacement::WeightedEdge,
1079 MachineBlockPlacement::WeightedEdge>
1080MachineBlockPlacement::getBestNonConflictingEdges(
1081 const MachineBasicBlock *BB,
1083 Edges) {
1084
1085
1086
1087
1088
1089
1090
1091 auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
1092
1095 auto BestA = Edges[0].begin();
1096 auto BestB = Edges[1].begin();
1097
1098
1099 if (BestA->Src == BestB->Src) {
1100
1101 auto SecondBestA = std::next(BestA);
1102 auto SecondBestB = std::next(BestB);
1103 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
1104 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
1105 if (BestAScore < BestBScore)
1106 BestA = SecondBestA;
1107 else
1108 BestB = SecondBestB;
1109 }
1110
1111 if (BestB->Src == BB)
1113 return std::make_pair(*BestA, *BestB);
1114}
1115
1116
1117
1118
1119
1120
1121
1122
1123MachineBlockPlacement::BlockAndTailDupResult
1124MachineBlockPlacement::getBestTrellisSuccessor(
1125 const MachineBasicBlock *BB,
1126 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1127 BranchProbability AdjustedSumProb, const BlockChain &Chain,
1128 const BlockFilterSet *BlockFilter) {
1129
1130 BlockAndTailDupResult Result = {nullptr, false};
1131 SmallPtrSet<const MachineBasicBlock *, 4> Successors(llvm::from_range,
1133
1134
1135
1136
1137 if (Successors.size() != 2 || ViableSuccs.size() != 2)
1139
1140
1142 int SuccIndex = 0;
1143 for (auto *Succ : ViableSuccs) {
1144 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
1145
1146 if (SuccPred != BB) {
1147 if (BlockFilter && !BlockFilter->count(SuccPred))
1148 continue;
1149 const BlockChain *SuccPredChain = BlockToChain[SuccPred];
1150 if (SuccPredChain == &Chain || SuccPredChain == BlockToChain[Succ])
1151 continue;
1152 }
1153 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
1155 Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
1156 }
1157 ++SuccIndex;
1158 }
1159
1160
1161 WeightedEdge BestA, BestB;
1162 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1163
1164 if (BestA.Src != BB) {
1165
1166
1167
1168 LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
1170 }
1171
1172
1173
1174
1175 if (BestA.Dest == BestB.Src) {
1176
1177
1178 MachineBasicBlock *Succ1 = BestA.Dest;
1179 MachineBasicBlock *Succ2 = BestB.Dest;
1180
1181 if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ2) &&
1182 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1183 isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
1184 Chain, BlockFilter)) {
1188 << ", probability: " << Succ2Prob
1189 << " (Tail Duplicate)\n");
1191 Result.ShouldTailDup = true;
1193 }
1194 }
1195
1196
1197 ComputedEdges[BestB.Src] = {BestB.Dest, false};
1198
1199 auto TrellisSucc = BestA.Dest;
1203 << ", probability: " << SuccProb << " (Trellis)\n");
1204 Result.BB = TrellisSucc;
1206}
1207
1208
1209
1210
1211bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1212 const MachineBasicBlock *BB, MachineBasicBlock *Succ,
1213 const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
1214 if (!shouldTailDuplicate(Succ))
1215 return false;
1216
1217
1218 bool Duplicate = true;
1219
1220 unsigned int NumDup = 0;
1221
1222
1223 SmallPtrSet<const MachineBasicBlock *, 4> Successors(llvm::from_range,
1225 for (MachineBasicBlock *Pred : Succ->predecessors()) {
1226
1227
1228
1229 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1230 (BlockToChain[Pred] == &Chain && !Succ->succ_empty()))
1231 continue;
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 continue;
1268 Duplicate = false;
1269 continue;
1270 }
1271 NumDup++;
1272 }
1273
1274
1275 if (NumDup == 0)
1276 return false;
1277
1278
1279
1280 if (F->getFunction().hasProfileData())
1281 return true;
1282
1283
1284
1285
1286
1288 return true;
1289
1290
1291 NumDup++;
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311 if ((NumDup > Succ->succ_size()) || !Duplicate)
1312 return false;
1313
1314 return true;
1315}
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334void MachineBlockPlacement::precomputeTriangleChains() {
1335 struct TriangleChain {
1336 std::vector<MachineBasicBlock *> Edges;
1337
1338 TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
1339 : Edges({src, dst}) {}
1340
1341 void append(MachineBasicBlock *dst) {
1342 assert(getKey()->isSuccessor(dst) &&
1343 "Attempting to append a block that is not a successor.");
1344 Edges.push_back(dst);
1345 }
1346
1347 unsigned count() const { return Edges.size() - 1; }
1348
1349 MachineBasicBlock *getKey() const { return Edges.back(); }
1350 };
1351
1353 return;
1354
1355 LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n");
1356
1357
1358 DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
1359 for (MachineBasicBlock &BB : *F) {
1360
1362 continue;
1363 MachineBasicBlock *PDom = nullptr;
1364 for (MachineBasicBlock *Succ : BB.successors()) {
1365 if (!MPDT->dominates(Succ, &BB))
1366 continue;
1367 PDom = Succ;
1368 break;
1369 }
1370
1371
1372 if (PDom == nullptr)
1373 continue;
1374
1375 if (MBPI->getEdgeProbability(&BB, PDom) < BranchProbability(50, 100))
1376 continue;
1377
1378
1379 if (!shouldTailDuplicate(PDom))
1380 continue;
1381 bool CanTailDuplicate = true;
1382
1383
1384 for (MachineBasicBlock *Pred : PDom->predecessors()) {
1385 if (Pred == &BB)
1386 continue;
1388 CanTailDuplicate = false;
1389 break;
1390 }
1391 }
1392
1393
1394 if (!CanTailDuplicate)
1395 continue;
1396
1397
1398
1399
1400
1401 auto Found = TriangleChainMap.find(&BB);
1402
1403
1404 if (Found != TriangleChainMap.end()) {
1405 TriangleChain Chain = std::move(Found->second);
1406 TriangleChainMap.erase(Found);
1407 Chain.append(PDom);
1408 TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1409 } else {
1410 auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);
1411 assert(InsertResult.second && "Block seen twice.");
1412 (void)InsertResult;
1413 }
1414 }
1415
1416
1417
1418
1419 for (auto &ChainPair : TriangleChainMap) {
1420 TriangleChain &Chain = ChainPair.second;
1421
1422
1423
1425 continue;
1426 MachineBasicBlock *dst = Chain.Edges.back();
1427 Chain.Edges.pop_back();
1428 for (MachineBasicBlock *src : reverse(Chain.Edges)) {
1431 << " as pre-computed based on triangles.\n");
1432
1433 auto InsertResult = ComputedEdges.insert({src, {dst, true}});
1434 assert(InsertResult.second && "Block seen twice.");
1435 (void)InsertResult;
1436
1437 dst = src;
1438 }
1439 }
1440}
1441
1442
1443
1444static BranchProbability
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1463 }
1464 }
1466}
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1477 const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
1478 const BlockChain &SuccChain, BranchProbability SuccProb,
1479 BranchProbability RealSuccProb, const BlockChain &Chain,
1480 const BlockFilterSet *BlockFilter) {
1481
1482
1483 if (SuccChain.UnscheduledPredecessors == 0)
1484 return false;
1485
1486
1487
1489 return false;
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1605
1606
1607
1608 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1609 bool BadCFGConflict = false;
1610
1611 for (MachineBasicBlock *Pred : Succ->predecessors()) {
1612 BlockChain *PredChain = BlockToChain[Pred];
1613 if (Pred == Succ || PredChain == &SuccChain ||
1614 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1615 Pred != *std::prev(PredChain->end()) ||
1616
1617
1618
1619 (Pred == BB))
1620 continue;
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634 BlockFrequency PredEdgeFreq =
1636 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
1637 BadCFGConflict = true;
1638 break;
1639 }
1640 }
1641
1642 if (BadCFGConflict) {
1644 << SuccProb << " (prob) (non-cold CFG conflict)\n");
1645 return true;
1646 }
1647
1648 return false;
1649}
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661MachineBlockPlacement::BlockAndTailDupResult
1662MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,
1663 const BlockChain &Chain,
1664 const BlockFilterSet *BlockFilter) {
1666
1667 BlockAndTailDupResult BestSucc = {nullptr, false};
1669
1670 SmallVector<MachineBasicBlock *, 4> Successors;
1671 auto AdjustedSumProb =
1672 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1673
1675 << "\n");
1676
1677
1678
1679 auto FoundEdge = ComputedEdges.find(BB);
1680 if (FoundEdge != ComputedEdges.end()) {
1681 MachineBasicBlock *Succ = FoundEdge->second.BB;
1682 ComputedEdges.erase(FoundEdge);
1683 BlockChain *SuccChain = BlockToChain[Succ];
1684 if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1685 SuccChain != &Chain && Succ == *SuccChain->begin())
1686 return FoundEdge->second;
1687 }
1688
1689
1690
1691 if (isTrellis(BB, Successors, Chain, BlockFilter))
1692 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1693 BlockFilter);
1694
1695
1696
1697
1699 DupCandidates;
1700 for (MachineBasicBlock *Succ : Successors) {
1702 BranchProbability SuccProb =
1704
1705 BlockChain &SuccChain = *BlockToChain[Succ];
1706
1707
1708 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1709 Chain, BlockFilter)) {
1710
1711 if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ))
1713 continue;
1714 }
1715
1718 << ", probability: " << SuccProb
1719 << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
1720 << "\n");
1721
1722 if (BestSucc.BB && BestProb >= SuccProb) {
1723 LLVM_DEBUG(dbgs() << " Not the best candidate, continuing\n");
1724 continue;
1725 }
1726
1727 LLVM_DEBUG(dbgs() << " Setting it as best candidate\n");
1728 BestSucc.BB = Succ;
1729 BestProb = SuccProb;
1730 }
1731
1732
1733
1734
1735
1737 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1738 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1739 return std::get<0>(L) > std::get<0>(R);
1740 });
1741 for (auto &Tup : DupCandidates) {
1742 BranchProbability DupProb;
1743 MachineBasicBlock *Succ;
1744 std::tie(DupProb, Succ) = Tup;
1745 if (DupProb < BestProb)
1746 break;
1747 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1748 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1750 << ", probability: " << DupProb
1751 << " (Tail Duplicate)\n");
1752 BestSucc.BB = Succ;
1753 BestSucc.ShouldTailDup = true;
1754 break;
1755 }
1756 }
1757
1758 if (BestSucc.BB)
1760
1761 return BestSucc;
1762}
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
1775 const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
1776
1777
1778
1779
1780 llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) {
1781 return BlockToChain.lookup(BB) == &Chain;
1782 });
1783
1784 if (WorkList.empty())
1785 return nullptr;
1786
1787 bool IsEHPad = WorkList[0]->isEHPad();
1788
1789 MachineBasicBlock *BestBlock = nullptr;
1790 BlockFrequency BestFreq;
1791 for (MachineBasicBlock *MBB : WorkList) {
1793 "EHPad mismatch between block and work list.");
1794
1795 BlockChain &SuccChain = *BlockToChain[MBB];
1796 if (&SuccChain == &Chain)
1797 continue;
1798
1799 assert(SuccChain.UnscheduledPredecessors == 0 &&
1800 "Found CFG-violating block");
1801
1802 BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
1805 << " (freq)\n");
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1826 continue;
1827
1828 BestBlock = MBB;
1829 BestFreq = CandidateFreq;
1830 }
1831
1832 return BestBlock;
1833}
1834
1835
1836
1837
1838
1839
1840
1841
1842MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1843 const BlockChain &PlacedChain,
1845
1847 ++I) {
1848 if (BlockChain *Chain = BlockToChain[&*I]; Chain != &PlacedChain) {
1849 PrevUnplacedBlockIt = I;
1850
1851
1852
1853 return *Chain->begin();
1854 }
1855 }
1856 return nullptr;
1857}
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1870 const BlockChain &PlacedChain,
1871 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1872 const BlockFilterSet *BlockFilter) {
1873 assert(BlockFilter);
1874 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1875 ++PrevUnplacedBlockInFilterIt) {
1876 BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1877 if (C != &PlacedChain) {
1878 return *C->begin();
1879 }
1880 }
1881 return nullptr;
1882}
1883
1884void MachineBlockPlacement::fillWorkLists(
1885 const MachineBasicBlock *MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
1886 const BlockFilterSet *BlockFilter = nullptr) {
1887 BlockChain &Chain = *BlockToChain[MBB];
1888 if (!UpdatedPreds.insert(&Chain).second)
1889 return;
1890
1892 Chain.UnscheduledPredecessors == 0 &&
1893 "Attempting to place block with unscheduled predecessors in worklist.");
1894 for (MachineBasicBlock *ChainBB : Chain) {
1895 assert(BlockToChain[ChainBB] == &Chain &&
1896 "Block in chain doesn't match BlockToChain map.");
1897 for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
1898 if (BlockFilter && !BlockFilter->count(Pred))
1899 continue;
1900 if (BlockToChain[Pred] == &Chain)
1901 continue;
1902 ++Chain.UnscheduledPredecessors;
1903 }
1904 }
1905
1906 if (Chain.UnscheduledPredecessors != 0)
1907 return;
1908
1909 MachineBasicBlock *BB = *Chain.begin();
1912 else
1914}
1915
1916void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
1917 BlockChain &Chain,
1918 BlockFilterSet *BlockFilter) {
1919 assert(HeadBB && "BB must not be null.\n");
1920 assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
1922 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1923 if (BlockFilter)
1924 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1925
1926 const MachineBasicBlock *LoopHeaderBB = HeadBB;
1927 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1928 MachineBasicBlock *BB = *std::prev(Chain.end());
1929 while (true) {
1930 assert(BB && "null block found at end of chain in loop.");
1931 assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
1932 assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
1933
1934
1935
1936 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1937 MachineBasicBlock *BestSucc = Result.BB;
1938 bool ShouldTailDup = Result.ShouldTailDup;
1939 if (allowTailDupPlacement(*F))
1940 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1941 BB, BestSucc, Chain, BlockFilter));
1942
1943
1944
1945
1946 if (!BestSucc)
1947 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1948 if (!BestSucc)
1949 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1950
1951 if (!BestSucc) {
1952 if (BlockFilter)
1953 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1954 BlockFilter);
1955 else
1956 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1957 if (!BestSucc)
1958 break;
1959
1960 LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
1961 "layout successor until the CFG reduces\n");
1962 }
1963
1964
1965
1966 if (allowTailDupPlacement(*F) && BestSucc && ShouldTailDup) {
1967 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1968 BlockFilter, PrevUnplacedBlockIt,
1969 PrevUnplacedBlockInFilterIt);
1970
1971
1973 continue;
1974 }
1975
1976
1977 BlockChain &SuccChain = *BlockToChain[BestSucc];
1978
1979
1980 SuccChain.UnscheduledPredecessors = 0;
1983 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1984 Chain.merge(BestSucc, &SuccChain);
1985 BB = *std::prev(Chain.end());
1986 }
1987
1988 LLVM_DEBUG(dbgs() << "Finished forming chain for header block "
1990}
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006bool MachineBlockPlacement::canMoveBottomBlockToTop(
2007 const MachineBasicBlock *BottomBlock, const MachineBasicBlock *OldTop) {
2008 if (BottomBlock->pred_size() != 1)
2009 return true;
2010 MachineBasicBlock *Pred = *BottomBlock->pred_begin();
2012 return true;
2013
2014 MachineBasicBlock *OtherBB = *Pred->succ_begin();
2015 if (OtherBB == BottomBlock)
2017 if (OtherBB == OldTop)
2018 return false;
2019
2020 return true;
2021}
2022
2023
2024BlockFrequency
2025MachineBlockPlacement::TopFallThroughFreq(const MachineBasicBlock *Top,
2026 const BlockFilterSet &LoopBlockSet) {
2027 BlockFrequency MaxFreq = BlockFrequency(0);
2028 for (MachineBasicBlock *Pred : Top->predecessors()) {
2029 BlockChain *PredChain = BlockToChain[Pred];
2030 if (!LoopBlockSet.count(Pred) &&
2031 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2032
2033
2035 bool TopOK = true;
2036 for (MachineBasicBlock *Succ : Pred->successors()) {
2038 BlockChain *SuccChain = BlockToChain[Succ];
2039
2040
2041 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
2042 (!SuccChain || Succ == *SuccChain->begin())) {
2043 TopOK = false;
2044 break;
2045 }
2046 }
2047 if (TopOK) {
2048 BlockFrequency EdgeFreq =
2050 if (EdgeFreq > MaxFreq)
2051 MaxFreq = EdgeFreq;
2052 }
2053 }
2054 }
2055 return MaxFreq;
2056}
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079BlockFrequency MachineBlockPlacement::FallThroughGains(
2080 const MachineBasicBlock *NewTop, const MachineBasicBlock *OldTop,
2081 const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet) {
2082 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2083 BlockFrequency FallThrough2Exit = BlockFrequency(0);
2084 if (ExitBB)
2085 FallThrough2Exit =
2086 MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, ExitBB);
2087 BlockFrequency BackEdgeFreq =
2088 MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, OldTop);
2089
2090
2091 MachineBasicBlock *BestPred = nullptr;
2092 BlockFrequency FallThroughFromPred = BlockFrequency(0);
2093 for (MachineBasicBlock *Pred : NewTop->predecessors()) {
2094 if (!LoopBlockSet.count(Pred))
2095 continue;
2096 BlockChain *PredChain = BlockToChain[Pred];
2097 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2098 BlockFrequency EdgeFreq =
2100 if (EdgeFreq > FallThroughFromPred) {
2101 FallThroughFromPred = EdgeFreq;
2102 BestPred = Pred;
2103 }
2104 }
2105 }
2106
2107
2108
2109 BlockFrequency NewFreq = BlockFrequency(0);
2110 if (BestPred) {
2111 for (MachineBasicBlock *Succ : BestPred->successors()) {
2112 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2113 continue;
2114 if (ComputedEdges.contains(Succ))
2115 continue;
2116 BlockChain *SuccChain = BlockToChain[Succ];
2117 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2118 (SuccChain == BlockToChain[BestPred]))
2119 continue;
2120 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
2122 if (EdgeFreq > NewFreq)
2123 NewFreq = EdgeFreq;
2124 }
2125 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
2127 if (NewFreq > OrigEdgeFreq) {
2128
2129
2130
2131 NewFreq = BlockFrequency(0);
2132 FallThroughFromPred = BlockFrequency(0);
2133 }
2134 }
2135
2136 BlockFrequency Result = BlockFrequency(0);
2137 BlockFrequency Gains = BackEdgeFreq + NewFreq;
2138 BlockFrequency Lost =
2139 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2140 if (Gains > Lost)
2141 Result = Gains - Lost;
2143}
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper(
2168 MachineBasicBlock *OldTop, const MachineLoop &L,
2169 const BlockFilterSet &LoopBlockSet) {
2170
2171
2172
2173 BlockChain &HeaderChain = *BlockToChain[OldTop];
2174 if (!LoopBlockSet.count(*HeaderChain.begin()))
2175 return OldTop;
2176 if (OldTop != *HeaderChain.begin())
2177 return OldTop;
2178
2180 << "\n");
2181
2182 BlockFrequency BestGains = BlockFrequency(0);
2183 MachineBasicBlock *BestPred = nullptr;
2184 for (MachineBasicBlock *Pred : OldTop->predecessors()) {
2185 if (!LoopBlockSet.count(Pred))
2186 continue;
2187 if (Pred == L.getHeader())
2188 continue;
2190 << Pred->succ_size() << " successors, "
2191 << printBlockFreq(MBFI->getMBFI(), *Pred) << " freq\n");
2193 continue;
2194
2195 MachineBasicBlock *OtherBB = nullptr;
2198 if (OtherBB == OldTop)
2200 }
2201
2202 if (!canMoveBottomBlockToTop(Pred, OldTop))
2203 continue;
2204
2205 BlockFrequency Gains =
2206 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2207 if ((Gains > BlockFrequency(0)) &&
2208 (Gains > BestGains ||
2210 BestPred = Pred;
2211 BestGains = Gains;
2212 }
2213 }
2214
2215
2216 if (!BestPred) {
2218 return OldTop;
2219 }
2220
2221
2222 while (BestPred->pred_size() == 1 &&
2223 (*BestPred->pred_begin())->succ_size() == 1 &&
2224 *BestPred->pred_begin() != L.getHeader())
2226
2228 return BestPred;
2229}
2230
2231
2232
2233
2234
2235MachineBasicBlock *
2236MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
2237 const BlockFilterSet &LoopBlockSet) {
2238
2239
2240
2241
2242
2243
2244
2246 return L.getHeader();
2247
2248 MachineBasicBlock *OldTop = nullptr;
2249 MachineBasicBlock *NewTop = L.getHeader();
2250 while (NewTop != OldTop) {
2251 OldTop = NewTop;
2252 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2253 if (NewTop != OldTop)
2254 ComputedEdges[NewTop] = {OldTop, false};
2255 }
2256 return NewTop;
2257}
2258
2259
2260
2261
2262
2263
2264MachineBasicBlock *
2265MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
2266 const BlockFilterSet &LoopBlockSet,
2267 BlockFrequency &ExitFreq) {
2268
2269
2270
2271
2272
2273
2274
2275
2276 BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
2277 if (!LoopBlockSet.count(*HeaderChain.begin()))
2278 return nullptr;
2279
2280 BlockFrequency BestExitEdgeFreq;
2281 unsigned BestExitLoopDepth = 0;
2282 MachineBasicBlock *ExitingBB = nullptr;
2283
2284
2285
2286 SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
2287
2290 for (MachineBasicBlock *MBB : L.getBlocks()) {
2291 BlockChain &Chain = *BlockToChain[MBB];
2292
2293
2294 if (MBB != *std::prev(Chain.end()))
2295 continue;
2296
2297
2298
2299
2300
2301 MachineBasicBlock *OldExitingBB = ExitingBB;
2302 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
2303 bool HasLoopingSucc = false;
2304 for (MachineBasicBlock *Succ : MBB->successors()) {
2306 continue;
2307 if (Succ == MBB)
2308 continue;
2309 BlockChain &SuccChain = *BlockToChain[Succ];
2310
2311 if (&Chain == &SuccChain) {
2313 << getBlockName(Succ) << " (chain conflict)\n");
2314 continue;
2315 }
2316
2318 if (LoopBlockSet.count(Succ)) {
2320 << getBlockName(Succ) << " (" << SuccProb << ")\n");
2321 HasLoopingSucc = true;
2322 continue;
2323 }
2324
2325 unsigned SuccLoopDepth = 0;
2326 if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
2327 SuccLoopDepth = ExitLoop->getLoopDepth();
2328 if (ExitLoop->contains(&L))
2329 BlocksExitingToOuterLoop.insert(MBB);
2330 }
2331
2332 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
2335 << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("
2336 << printBlockFreq(MBFI->getMBFI(), ExitEdgeFreq) << ")\n");
2337
2338
2339
2340
2341 BranchProbability Bias(100 - ExitBlockBias, 100);
2342 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2343 ExitEdgeFreq > BestExitEdgeFreq ||
2345 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2346 BestExitEdgeFreq = ExitEdgeFreq;
2347 ExitingBB = MBB;
2348 }
2349 }
2350
2351 if (!HasLoopingSucc) {
2352
2353 ExitingBB = OldExitingBB;
2354 BestExitEdgeFreq = OldBestExitEdgeFreq;
2355 }
2356 }
2357
2358
2359 if (!ExitingBB) {
2361 dbgs() << " No other candidate exit blocks, using loop header\n");
2362 return nullptr;
2363 }
2364 if (L.getNumBlocks() == 1) {
2365 LLVM_DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");
2366 return nullptr;
2367 }
2368
2369
2370
2371
2372 if (!BlocksExitingToOuterLoop.empty() &&
2373 !BlocksExitingToOuterLoop.count(ExitingBB))
2374 return nullptr;
2375
2377 << "\n");
2378 ExitFreq = BestExitEdgeFreq;
2379 return ExitingBB;
2380}
2381
2382
2383
2384
2385
2386bool MachineBlockPlacement::hasViableTopFallthrough(
2387 const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet) {
2388 for (MachineBasicBlock *Pred : Top->predecessors()) {
2389 BlockChain *PredChain = BlockToChain[Pred];
2390 if (!LoopBlockSet.count(Pred) &&
2391 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2392
2393
2395 bool TopOK = true;
2396 for (MachineBasicBlock *Succ : Pred->successors()) {
2398 BlockChain *SuccChain = BlockToChain[Succ];
2399
2400
2401 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2402 TopOK = false;
2403 break;
2404 }
2405 }
2406 if (TopOK)
2407 return true;
2408 }
2409 }
2410 return false;
2411}
2412
2413
2414
2415
2416
2417
2418
2419void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2420 const MachineBasicBlock *ExitingBB,
2421 BlockFrequency ExitFreq,
2422 const BlockFilterSet &LoopBlockSet) {
2423 if (!ExitingBB)
2424 return;
2425
2426 MachineBasicBlock *Top = *LoopChain.begin();
2427 MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
2428
2429
2430 if (Bottom == ExitingBB)
2431 return;
2432
2433
2435 return;
2436
2437 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2438
2439
2440
2441
2442 if (ViableTopFallthrough) {
2443 for (MachineBasicBlock *Succ : Bottom->successors()) {
2444 BlockChain *SuccChain = BlockToChain[Succ];
2445 if (!LoopBlockSet.count(Succ) &&
2446 (!SuccChain || Succ == *SuccChain->begin()))
2447 return;
2448 }
2449
2450
2451
2452 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2453 if (FallThrough2Top >= ExitFreq)
2454 return;
2455 }
2456
2457 BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
2458 if (ExitIt == LoopChain.end())
2459 return;
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480 if (ViableTopFallthrough) {
2481 assert(std::next(ExitIt) != LoopChain.end() &&
2482 "Exit should not be last BB");
2483 MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
2484 if (ExitingBB->isSuccessor(NextBlockInChain))
2486 return;
2487 }
2488
2490 << " at bottom\n");
2491 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2492}
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507void MachineBlockPlacement::rotateLoopWithProfile(
2508 BlockChain &LoopChain, const MachineLoop &L,
2509 const BlockFilterSet &LoopBlockSet) {
2510 auto RotationPos = LoopChain.end();
2511 MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
2512
2513
2515 return;
2516
2518
2519
2520
2521 auto ScaleBlockFrequency = [](BlockFrequency Freq,
2522 unsigned Scale) -> BlockFrequency {
2523 if (Scale == 0)
2524 return BlockFrequency(0);
2525
2526
2527 return Freq / BranchProbability(1, Scale);
2528 };
2529
2530
2531
2532
2533 BlockFrequency HeaderFallThroughCost(0);
2534 for (auto *Pred : ChainHeaderBB->predecessors()) {
2535 BlockChain *PredChain = BlockToChain[Pred];
2536 if (!LoopBlockSet.count(Pred) &&
2537 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2538 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2540 auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
2541
2542
2544 FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
2545 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2546 }
2547 }
2548
2549
2550
2551
2552
2554 for (auto *BB : LoopChain) {
2556 for (auto *Succ : BB->successors()) {
2557 BlockChain *SuccChain = BlockToChain[Succ];
2558 if (!LoopBlockSet.count(Succ) &&
2559 (!SuccChain || Succ == *SuccChain->begin())) {
2561 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2562 }
2563 }
2565 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2567 }
2568 }
2569
2570
2571
2572
2573 for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2574 EndIter = LoopChain.end();
2575 Iter != EndIter; Iter++, TailIter++) {
2576
2577
2578 if (TailIter == LoopChain.end())
2579 TailIter = LoopChain.begin();
2580
2581 auto TailBB = *TailIter;
2582
2583
2584 BlockFrequency Cost = BlockFrequency(0);
2585
2586
2587
2588
2589 if (Iter != LoopChain.begin())
2590 Cost += HeaderFallThroughCost;
2591
2592
2593
2594 for (auto &ExitWithFreq : ExitsWithFreq)
2595 if (TailBB != ExitWithFreq.first)
2596 Cost += ExitWithFreq.second;
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612 if (TailBB->isSuccessor(*Iter)) {
2613 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2614 if (TailBB->succ_size() == 1)
2616 else if (TailBB->succ_size() == 2) {
2618 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2619 auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
2620 ? TailBBFreq * TailToHeadProb.getCompl()
2621 : TailToHeadFreq;
2623 ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
2624 }
2625 }
2626
2627 LLVM_DEBUG(dbgs() << "The cost of loop rotation by making "
2630
2631 if (Cost < SmallestRotationCost) {
2632 SmallestRotationCost = Cost;
2633 RotationPos = Iter;
2634 }
2635 }
2636
2637 if (RotationPos != LoopChain.end()) {
2639 << " to the top\n");
2640 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2641 }
2642}
2643
2644
2645
2646
2647
2648MachineBlockPlacement::BlockFilterSet
2649MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
2650
2651
2652 struct MBBCompare {
2653 bool operator()(const MachineBasicBlock *X,
2654 const MachineBasicBlock *Y) const {
2655 return X->getNumber() < Y->getNumber();
2656 }
2657 };
2658 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2670 BlockFrequency LoopFreq(0);
2671 for (auto *LoopPred : L.getHeader()->predecessors())
2672 if (.contains(LoopPred))
2673 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2675
2676 for (MachineBasicBlock *LoopBB : L.getBlocks()) {
2677 if (LoopBlockSet.count(LoopBB))
2678 continue;
2679 auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
2681 continue;
2682 BlockChain *Chain = BlockToChain[LoopBB];
2683 for (MachineBasicBlock *ChainBB : *Chain)
2684 LoopBlockSet.insert(ChainBB);
2685 }
2686 } else
2687 LoopBlockSet.insert(L.block_begin(), L.block_end());
2688
2689
2690
2691
2692 BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2693 return Ret;
2694}
2695
2696
2697
2698
2699
2700
2701
2702void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
2703
2704
2705 for (const MachineLoop *InnerLoop : L)
2706 buildLoopChains(*InnerLoop);
2707
2709 "BlockWorkList not empty when starting to build loop chains.");
2711 "EHPadWorkList not empty when starting to build loop chains.");
2712 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2713
2714
2715
2716
2717 bool RotateLoopWithProfile =
2720
2721
2722
2723
2724
2725 MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
2726
2727
2728
2729
2730
2731
2732
2733 PreferredLoopExit = nullptr;
2734 BlockFrequency ExitFreq;
2735 if (!RotateLoopWithProfile && LoopTop == L.getHeader())
2736 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2737
2738 BlockChain &LoopChain = *BlockToChain[LoopTop];
2739
2740
2741
2742
2743 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2744 assert(LoopChain.UnscheduledPredecessors == 0 &&
2745 "LoopChain should not have unscheduled predecessors.");
2746 UpdatedPreds.insert(&LoopChain);
2747
2748 for (const MachineBasicBlock *LoopBB : LoopBlockSet)
2749 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2750
2751 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2752
2753 if (RotateLoopWithProfile)
2754 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2755 else
2756 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2757
2759
2760 bool BadLoop = false;
2761 if (LoopChain.UnscheduledPredecessors) {
2762 BadLoop = true;
2763 dbgs() << "Loop chain contains a block without its preds placed!\n"
2764 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
2765 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
2766 }
2767 for (MachineBasicBlock *ChainBB : LoopChain) {
2769 if (!LoopBlockSet.remove(ChainBB)) {
2770
2771
2772
2773 dbgs() << "Loop chain contains a block not contained by the loop!\n"
2774 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
2775 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
2776 << " Bad block: " << getBlockName(ChainBB) << "\n";
2777 }
2778 }
2779
2780 if (!LoopBlockSet.empty()) {
2781 BadLoop = true;
2782 for (const MachineBasicBlock *LoopBB : LoopBlockSet)
2783 dbgs() << "Loop contains blocks never placed into a chain!\n"
2784 << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
2785 << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
2786 << " Bad block: " << getBlockName(LoopBB) << "\n";
2787 }
2788 assert(!BadLoop && "Detected problems with the placement of this loop.");
2789 });
2790
2791 BlockWorkList.clear();
2792 EHPadWorkList.clear();
2793}
2794
2795void MachineBlockPlacement::buildCFGChains() {
2796
2797
2800 ++FI) {
2801 MachineBasicBlock *BB = &*FI;
2802 BlockChain *Chain =
2803 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
2804
2805
2806 while (true) {
2807 Cond.clear();
2808 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2810 break;
2811
2813 MachineBasicBlock *NextBB = &*NextFI;
2814
2815
2816 assert(NextFI != FE && "Can't fallthrough past the last block.");
2817 LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
2819 << "\n");
2820 Chain->merge(NextBB, nullptr);
2821#ifndef NDEBUG
2822 BlocksWithUnanalyzableExits.insert(&*BB);
2823#endif
2824 FI = NextFI;
2825 BB = NextBB;
2826 }
2827 }
2828
2829
2830 PreferredLoopExit = nullptr;
2831 for (MachineLoop *L : *MLI)
2832 buildLoopChains(*L);
2833
2835 "BlockWorkList should be empty before building final chain.");
2837 "EHPadWorkList should be empty before building final chain.");
2838
2839 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2840 for (MachineBasicBlock &MBB : *F)
2841 fillWorkLists(&MBB, UpdatedPreds);
2842
2843 BlockChain &FunctionChain = *BlockToChain[&F->front()];
2844 buildChain(&F->front(), FunctionChain);
2845
2846#ifndef NDEBUG
2847 using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
2848#endif
2850
2851 bool BadFunc = false;
2852 FunctionBlockSetType FunctionBlockSet;
2853 for (MachineBasicBlock &MBB : *F)
2854 FunctionBlockSet.insert(&MBB);
2855
2856 for (MachineBasicBlock *ChainBB : FunctionChain)
2857 if (!FunctionBlockSet.erase(ChainBB)) {
2858 BadFunc = true;
2859 dbgs() << "Function chain contains a block not in the function!\n"
2860 << " Bad block: " << getBlockName(ChainBB) << "\n";
2861 }
2862
2863 if (!FunctionBlockSet.empty()) {
2864 BadFunc = true;
2865 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2866 dbgs() << "Function contains blocks never placed into a chain!\n"
2867 << " Bad block: " << getBlockName(RemainingBB) << "\n";
2868 }
2869 assert(!BadFunc && "Detected problems with the block placement.");
2870 });
2871
2872
2873
2874 SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
2875 F->getNumBlockIDs());
2876 {
2877 MachineBasicBlock *LastMBB = nullptr;
2879 if (LastMBB != nullptr)
2880 OriginalLayoutSuccessors[LastMBB->getNumber()] = &MBB;
2881 LastMBB = &MBB;
2882 }
2883 OriginalLayoutSuccessors[F->back().getNumber()] = nullptr;
2884 }
2885
2886
2888 LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");
2889 for (MachineBasicBlock *ChainBB : FunctionChain) {
2890 LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
2891 : " ... ")
2894 F->splice(InsertPos, ChainBB);
2895 else
2896 ++InsertPos;
2897
2898
2899 if (ChainBB == *FunctionChain.begin())
2900 continue;
2902
2903
2904
2905
2906 Cond.clear();
2907 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2908
2909#ifndef NDEBUG
2910 if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
2911
2912
2913
2916 "Unexpected block with un-analyzable fallthrough!");
2917 Cond.clear();
2918 TBB = FBB = nullptr;
2919 }
2920#endif
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2945 }
2946 }
2947
2948
2949 Cond.clear();
2950 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2952 MachineBasicBlock *PrevBB = &F->back();
2954 }
2955
2956 BlockWorkList.clear();
2957 EHPadWorkList.clear();
2958}
2959
2960void MachineBlockPlacement::optimizeBranches() {
2961 BlockChain &FunctionChain = *BlockToChain[&F->front()];
2963
2964
2965
2966
2967
2968
2969
2970 for (MachineBasicBlock *ChainBB : FunctionChain) {
2971 Cond.clear();
2972 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2974 continue;
2976 continue;
2977
2978
2979
2981 continue;
2982
2983
2986 continue;
2988 continue;
2989 LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "
2992 << "\n");
2993 auto Dl = ChainBB->findBranchDebugLoc();
2996 }
2997}
2998
2999void MachineBlockPlacement::alignBlocks() {
3000
3001
3002
3003
3004
3006 if (F->getFunction().hasMinSize() ||
3008 return;
3009 }
3010
3011 BlockChain &FunctionChain = *BlockToChain[&F->front()];
3012
3013 if (FunctionChain.begin() == FunctionChain.end())
3014 return;
3015
3016 const BranchProbability ColdProb(1, 5);
3017 BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
3018 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
3019 for (MachineBasicBlock *ChainBB : FunctionChain) {
3020 if (ChainBB == *FunctionChain.begin())
3021 continue;
3022
3023
3024
3025
3026
3027 MachineLoop *L = MLI->getLoopFor(ChainBB);
3028 if (!L)
3029 continue;
3030
3032 unsigned MDAlign = 1;
3033 MDNode *LoopID = L->getLoopID();
3034 if (LoopID) {
3037 if (MD == nullptr)
3038 continue;
3040 if (S == nullptr)
3041 continue;
3042 if (S->getString() == "llvm.loop.align") {
3044 "per-loop align metadata should have two operands.");
3045 MDAlign =
3047 assert(MDAlign >= 1 && "per-loop align value must be positive.");
3048 }
3049 }
3050 }
3051
3052
3053 const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));
3054 if (LoopAlign == 1)
3055 continue;
3056
3057
3058
3059 BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
3060 if (Freq < WeightedEntryFreq)
3061 continue;
3062
3063
3064
3065 MachineBasicBlock *LoopHeader = L->getHeader();
3066 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
3067 if (Freq < (LoopHeaderFreq * ColdProb))
3068 continue;
3069
3070
3073 continue;
3074
3075
3076
3077 MachineBasicBlock *LayoutPred =
3079
3080 auto DetermineMaxAlignmentPadding = [&]() {
3081
3082 unsigned MaxBytes;
3085 else
3087 ChainBB->setMaxBytesForAlignment(MaxBytes);
3088 };
3089
3090
3091
3092 if (!LayoutPred->isSuccessor(ChainBB)) {
3093 ChainBB->setAlignment(LoopAlign);
3094 DetermineMaxAlignmentPadding();
3095 continue;
3096 }
3097
3098
3099
3100
3101
3102 BranchProbability LayoutProb =
3104 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3105 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3106 ChainBB->setAlignment(LoopAlign);
3107 DetermineMaxAlignmentPadding();
3108 }
3109 }
3110
3111 const bool HasMaxBytesOverride =
3113
3115
3116 for (MachineBasicBlock &MBB : *F) {
3117 if (HasMaxBytesOverride)
3120 else
3122 }
3124
3125
3126 for (auto MBI = std::next(F->begin()), MBE = F->end(); MBI != MBE; ++MBI) {
3127 auto LayoutPred = std::prev(MBI);
3129 if (HasMaxBytesOverride)
3132 else
3134 }
3135 }
3136 }
3137}
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3155 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
3156 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
3158 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3159 bool Removed, DuplicatedToLPred;
3160 bool DuplicatedToOriginalLPred;
3161 Removed = maybeTailDuplicateBlock(
3162 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3163 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3164 if (!Removed)
3165 return false;
3166 DuplicatedToOriginalLPred = DuplicatedToLPred;
3167
3168
3169
3170
3171 while (DuplicatedToLPred && Removed) {
3172 MachineBasicBlock *DupBB, *DupPred;
3173
3174
3175
3176
3177
3178 BlockChain::iterator ChainEnd = Chain.end();
3179 DupBB = *(--ChainEnd);
3180
3181 if (ChainEnd == Chain.begin())
3182 break;
3183 DupPred = *std::prev(ChainEnd);
3184 Removed = maybeTailDuplicateBlock(
3185 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3186 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3187 }
3188
3189
3190
3191
3192
3193 LPred = *std::prev(Chain.end());
3194 if (DuplicatedToOriginalLPred)
3195 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3196 return true;
3197}
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212bool MachineBlockPlacement::maybeTailDuplicateBlock(
3213 MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
3215 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3216 bool &DuplicatedToLPred) {
3217 DuplicatedToLPred = false;
3218 if (!shouldTailDuplicate(BB))
3219 return false;
3220
3222 << "\n");
3223
3224
3225
3226 bool Removed = false;
3227 auto RemovalCallback = [&](MachineBasicBlock *RemBB) {
3228
3229 Removed = true;
3230
3231
3232 if (auto It = BlockToChain.find(RemBB); It != BlockToChain.end()) {
3233 It->second->remove(RemBB);
3234 BlockToChain.erase(It);
3235 }
3236
3237
3238 if (&(*PrevUnplacedBlockIt) == RemBB) {
3239 PrevUnplacedBlockIt++;
3240 }
3241
3242
3243 if (RemBB->isEHPad()) {
3245 } else {
3247 }
3248
3249
3250 if (BlockFilter) {
3251 auto It = llvm::find(*BlockFilter, RemBB);
3252
3253
3254 if (It != BlockFilter->end()) {
3255 if (It < PrevUnplacedBlockInFilterIt) {
3256 const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;
3257
3258
3259 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3260 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance;
3261 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3262 (void)PrevBB;
3263 } else if (It == PrevUnplacedBlockInFilterIt)
3264
3265
3266 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3267 else
3268 BlockFilter->erase(It);
3269 }
3270 }
3271
3272
3273 MLI->removeBlock(RemBB);
3274 if (RemBB == PreferredLoopExit)
3275 PreferredLoopExit = nullptr;
3276
3278 << "\n");
3279 };
3280 auto RemovalCallbackRef =
3281 function_ref<void(MachineBasicBlock *)>(RemovalCallback);
3282
3283 SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
3284 bool IsSimple = TailDup.isSimpleBB(BB);
3285 SmallVector<MachineBasicBlock *, 8> CandidatePreds;
3286 SmallVectorImpl<MachineBasicBlock *> *CandidatePtr = nullptr;
3287 if (F->getFunction().hasProfileData()) {
3288
3289 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3290 if (CandidatePreds.size() == 0)
3291 return false;
3293 CandidatePtr = &CandidatePreds;
3294 }
3296 &RemovalCallbackRef, CandidatePtr);
3297
3298
3299 DuplicatedToLPred = false;
3300 for (MachineBasicBlock *Pred : DuplicatedPreds) {
3301
3302 BlockChain *PredChain = BlockToChain[Pred];
3303 if (Pred == LPred)
3304 DuplicatedToLPred = true;
3305 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3306 PredChain == &Chain)
3307 continue;
3308 for (MachineBasicBlock *NewSucc : Pred->successors()) {
3309 if (BlockFilter && !BlockFilter->count(NewSucc))
3310 continue;
3311 BlockChain *NewChain = BlockToChain[NewSucc];
3312 if (NewChain != &Chain && NewChain != PredChain)
3313 NewChain->UnscheduledPredecessors++;
3314 }
3315 }
3316 return Removed;
3317}
3318
3319
3323 if (.isPHI() &&
.isMetaInstruction())
3325 }
3327}
3328
3329
3330
3331
3332BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
3334}
3335
3336
3337bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
3338 MachineBasicBlock *Pred,
3339 BlockFilterSet *BlockFilter) {
3340 if (BB == Pred)
3341 return false;
3342 if (BlockFilter && !BlockFilter->count(Pred))
3343 return false;
3344 BlockChain *PredChain = BlockToChain[Pred];
3345 if (PredChain && (Pred != *std::prev(PredChain->end())))
3346 return false;
3347
3348
3350 for (MachineBasicBlock *Succ : Pred->successors())
3351 if (Succ != BB) {
3352 if (BlockFilter && !BlockFilter->count(Succ))
3353 continue;
3354 BlockChain *SuccChain = BlockToChain[Succ];
3355 if (SuccChain && (Succ != *SuccChain->begin()))
3356 continue;
3358 if (SuccProb > BestProb)
3359 BestProb = SuccProb;
3360 }
3361
3363 if (BBProb <= BestProb)
3364 return false;
3365
3366
3367
3368 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3369 BlockFrequency Gain = PredFreq * (BBProb - BestProb);
3370 return Gain > scaleThreshold(BB);
3371}
3372
3373
3374
3375void MachineBlockPlacement::findDuplicateCandidates(
3376 SmallVectorImpl<MachineBasicBlock *> &Candidates, MachineBasicBlock *BB,
3377 BlockFilterSet *BlockFilter) {
3378 MachineBasicBlock *Fallthrough = nullptr;
3380 BlockFrequency BBDupThreshold(scaleThreshold(BB));
3381 SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors());
3382 SmallVector<MachineBasicBlock *, 8> Succs(BB->successors());
3383
3384
3385 auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
3387 };
3388 auto CmpPred = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
3389 return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(B);
3390 };
3393
3394 auto SuccIt = Succs.begin();
3395 if (SuccIt != Succs.end()) {
3397 }
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440 for (MachineBasicBlock *Pred : Preds) {
3441 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3442
3444
3445
3446 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3447 Fallthrough = Pred;
3448 if (SuccIt != Succs.end())
3449 SuccIt++;
3450 }
3451 continue;
3452 }
3453
3454 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3455 BlockFrequency DupCost;
3456 if (SuccIt == Succs.end()) {
3457
3458 if (Succs.size() > 0)
3459 DupCost += PredFreq;
3460 } else {
3461
3462 DupCost += PredFreq;
3464 }
3465
3466 assert(OrigCost >= DupCost);
3467 OrigCost -= DupCost;
3468 if (OrigCost > BBDupThreshold) {
3470 if (SuccIt != Succs.end())
3471 SuccIt++;
3472 }
3473 }
3474
3475
3476
3477 if (!Fallthrough) {
3478 if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) {
3479 Candidates[0] = Candidates.back();
3481 }
3482 }
3483}
3484
3485void MachineBlockPlacement::initTailDupThreshold() {
3486 DupThreshold = BlockFrequency(0);
3487 if (F->getFunction().hasProfileData()) {
3488
3491 UseProfileCount = true;
3492 DupThreshold =
3494 } else {
3495
3496 BlockFrequency MaxFreq = BlockFrequency(0);
3497 for (MachineBasicBlock &MBB : *F) {
3498 BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
3499 if (Freq > MaxFreq)
3500 MaxFreq = Freq;
3501 }
3502
3504 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);
3505 UseProfileCount = false;
3506 }
3507 }
3508
3510
3514
3515
3516
3517 if (OptLevel >= CodeGenOptLevel::Aggressive) {
3518
3519
3520
3524 }
3525
3526
3527
3529 (OptLevel < CodeGenOptLevel::Aggressive ||
3532}
3533
3534PreservedAnalyses
3538 auto MBFI = std::make_unique(
3541 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
3543 : nullptr;
3545 .getCachedResult(
3547 if (!PSI)
3548 report_fatal_error("MachineBlockPlacement requires ProfileSummaryAnalysis",
3549 false);
3550
3551 MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
3552 AllowTailMerge);
3553
3554 if (!MBP.run(MF))
3556
3558}
3559
3563 OS << MapClassName2PassName(name());
3564 if (!AllowTailMerge)
3565 OS << "";
3566}
3567
3569
3570
3571 if (std::next(MF.begin()) == MF.end())
3572 return false;
3573
3574 F = &MF;
3575 OptLevel = F->getTarget().getOptLevel();
3576
3579
3580
3581
3582 PreferredLoopExit = nullptr;
3583
3585 "BlockToChain map should be empty before starting placement.");
3587 "Computed Edge map should be empty before starting placement.");
3588
3589
3590 initTailDupThreshold();
3591
3592 const bool OptForSize =
3594
3595
3596
3597 bool UseExtTspForPerf = false;
3598 bool UseExtTspForSize = false;
3600 UseExtTspForPerf =
3604 }
3605
3606
3607 if (allowTailDupPlacement(*F)) {
3608 if (OptForSize)
3609 TailDupSize = 1;
3610 const bool PreRegAlloc = false;
3611 TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3612 true, TailDupSize);
3613 if (!UseExtTspForSize)
3614 precomputeTriangleChains();
3615 }
3616
3617
3618 if (!UseExtTspForSize)
3619 buildCFGChains();
3620
3621
3622
3623
3626 MF.size() > 3;
3627
3628 if (EnableTailMerge) {
3630 BranchFolder BF(true, false,
3632
3634 true)) {
3635
3636 if (MPDT)
3638 if (!UseExtTspForSize) {
3639
3640 BlockToChain.clear();
3641 ComputedEdges.clear();
3643 buildCFGChains();
3644 }
3645 }
3646 }
3647
3648
3649
3650
3651 if (UseExtTspForPerf || UseExtTspForSize) {
3653 !(UseExtTspForPerf && UseExtTspForSize) &&
3654 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3655 applyExtTsp(UseExtTspForSize);
3656 createCFGChainExtTsp();
3657 }
3658
3659 optimizeBranches();
3660 alignBlocks();
3661
3662 BlockToChain.clear();
3663 ComputedEdges.clear();
3665
3666
3672 MBFI->view("MBP." + MF.getName(), false);
3673 }
3674
3675
3676
3677 return true;
3678}
3679
3680void MachineBlockPlacement::applyExtTsp(bool OptForSize) {
3681
3682 DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
3683 BlockIndex.reserve(F->size());
3684 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3685 CurrentBlockOrder.reserve(F->size());
3686 size_t NumBlocks = 0;
3687 for (const MachineBasicBlock &MBB : *F) {
3688 BlockIndex[&MBB] = NumBlocks++;
3689 CurrentBlockOrder.push_back(&MBB);
3690 }
3691
3692 SmallVector<uint64_t, 0> BlockCounts(F->size());
3693 SmallVector<uint64_t, 0> BlockSizes(F->size());
3697 for (MachineBasicBlock &MBB : *F) {
3698
3699 BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
3700 BlockCounts[BlockIndex[&MBB]] = OptForSize ? 1 : BlockFreq.getFrequency();
3701
3702
3703
3704
3705
3706
3707 auto NonDbgInsts =
3709 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3710 BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts;
3711
3712
3713 if (OptForSize) {
3714 Cond.clear();
3715 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
3717 continue;
3718
3720
3721
3722
3726 if (FBB && FBB != FTB)
3728 if (FTB)
3730
3731
3732
3733 const uint64_t Freq = Succs.size() == 1 ? 110 : 100;
3734 for (const MachineBasicBlock *Succ : Succs)
3735 JumpCounts.push_back({BlockIndex[&MBB], BlockIndex[Succ], Freq});
3736 } else {
3737 for (MachineBasicBlock *Succ : MBB.successors()) {
3739 BlockFrequency JumpFreq = BlockFreq * EP;
3741 {BlockIndex[&MBB], BlockIndex[Succ], JumpFreq.getFrequency()});
3742 }
3743 }
3744 }
3745
3746 LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size()
3747 << " with profile = " << F->getFunction().hasProfileData()
3748 << " (" << F->getName() << ")" << "\n");
3749
3750 const double OrgScore = calcExtTspScore(BlockSizes, JumpCounts);
3752
3753
3754 auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
3755 std::vector<const MachineBasicBlock *> NewBlockOrder;
3756 NewBlockOrder.reserve(F->size());
3757 for (uint64_t Node : NewOrder) {
3758 NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3759 }
3760 const double OptScore = calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3762
3763
3764 if (OptForSize && OrgScore > OptScore)
3765 assignBlockOrder(CurrentBlockOrder);
3766 else
3767 assignBlockOrder(NewBlockOrder);
3768}
3769
3770void MachineBlockPlacement::assignBlockOrder(
3771 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3772 assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order");
3773 F->RenumberBlocks();
3774
3775
3776
3777
3778 MPDT = nullptr;
3779
3780 bool HasChanges = false;
3781 for (size_t I = 0; I < NewBlockOrder.size(); I++) {
3782 if (NewBlockOrder[I] != F->getBlockNumbered(I)) {
3783 HasChanges = true;
3784 break;
3785 }
3786 }
3787
3788 if (!HasChanges)
3789 return;
3790
3791 SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(F->getNumBlockIDs());
3794 }
3795
3796
3797 DenseMap<const MachineBasicBlock *, size_t> NewIndex;
3798 for (const MachineBasicBlock *MBB : NewBlockOrder) {
3799 NewIndex[MBB] = NewIndex.size();
3800 }
3801 F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
3802 return NewIndex[&L] < NewIndex[&R];
3803 });
3804
3805
3806
3807 const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();
3812 auto *FTMBB = PrevFallThroughs[MBB.getNumber()];
3813
3814
3815
3816 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3818 }
3819
3820
3821 Cond.clear();
3822 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
3824 continue;
3826 }
3827}
3828
3829void MachineBlockPlacement::createCFGChainExtTsp() {
3830 BlockToChain.clear();
3831 ComputedEdges.clear();
3833
3834 MachineBasicBlock *HeadBB = &F->front();
3835 BlockChain *FunctionChain =
3836 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB);
3837
3838 for (MachineBasicBlock &MBB : *F) {
3839 if (HeadBB == &MBB)
3840 continue;
3841 FunctionChain->merge(&MBB, nullptr);
3842 }
3843}
3844
3845namespace {
3846
3847
3848
3849
3850
3851
3852
3853class MachineBlockPlacementStats {
3854
3855 const MachineBranchProbabilityInfo *MBPI;
3856
3857
3858 const MachineBlockFrequencyInfo *MBFI;
3859
3860public:
3861 MachineBlockPlacementStats(const MachineBranchProbabilityInfo *MBPI,
3862 const MachineBlockFrequencyInfo *MBFI)
3863 : MBPI(MBPI), MBFI(MBFI) {}
3864 bool run(MachineFunction &MF);
3865};
3866
3867class MachineBlockPlacementStatsLegacy : public MachineFunctionPass {
3868public:
3869 static char ID;
3870
3871 MachineBlockPlacementStatsLegacy() : MachineFunctionPass(ID) {
3874 }
3875
3876 bool runOnMachineFunction(MachineFunction &F) override {
3877 auto *MBPI =
3878 &getAnalysis().getMBPI();
3879 auto *MBFI = &getAnalysis().getMBFI();
3880 return MachineBlockPlacementStats(MBPI, MBFI).run(F);
3881 }
3882
3883 void getAnalysisUsage(AnalysisUsage &AU) const override {
3884 AU.addRequired();
3885 AU.addRequired();
3888 }
3889};
3890
3891}
3892
3893char MachineBlockPlacementStatsLegacy::ID = 0;
3894
3896
3898 "Basic Block Placement Stats", false, false)
3903
3909
3910 MachineBlockPlacementStats(&MBPI, &MBFI).run(MF);
3912}
3913
3915
3916 if (std::next(F.begin()) == F.end())
3917 return false;
3918
3920 return false;
3921
3925 (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3927 (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
3929
3930 if (MBB.isLayoutSuccessor(Succ))
3931 continue;
3932
3935 ++NumBranches;
3937 }
3938 }
3939
3940 return false;
3941}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
Definition MachineBlockPlacement.cpp:810
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > PredecessorLimit("block-placement-predecessor-limit", cl::desc("For blocks with more predecessors, certain layout optimizations" "will be disabled to prevent quadratic compile time."), cl::init(1000), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
Definition MachineBlockPlacement.cpp:1445
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
Definition MachineBlockPlacement.cpp:858
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
Definition MachineBlockPlacement.cpp:698
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
Definition MachineBlockPlacement.cpp:825
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
Definition MachineBlockPlacement.cpp:3320
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Module * getParent()
Get the module that this global value is contained inside of...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition MachineBlockPlacement.cpp:3535
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
Definition MachineBlockPlacement.cpp:3560
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition MachineBlockPlacement.cpp:3905
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Analysis pass that exposes the MachineLoopInfo for a machine function.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
StringRef - Represent a constant reference to a string, i.e.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
virtual unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const
Returns the target-specific default value for tail duplication.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
cl::opt< bool > ApplyExtTspWithoutProfile
constexpr from_range_t from_range
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
Definition MachineBlockPlacement.cpp:242
bool isFunctionInPrintList(StringRef FunctionName)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI void initializeMachineBlockPlacementLegacyPass(PassRegistry &)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > StaticLikelyProb
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
Definition MachineBlockPlacement.cpp:682
cl::opt< bool > EnableExtTspBlockPlacement
LLVM_ABI char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
Definition MachineBlockPlacement.cpp:3895
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.