LLVM: lib/Transforms/Utils/SampleProfileInference.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
20#include
21#include
22#include
23#include <unordered_set>
24
25using namespace llvm;
26#define DEBUG_TYPE "sample-profile-inference"
27
28namespace {
29
30static cl::opt SampleProfileEvenFlowDistribution(
32 cl::desc("Try to evenly distribute flow when there are multiple equally "
33 "likely options."));
34
35static cl::opt SampleProfileRebalanceUnknown(
37 cl::desc("Evenly re-distribute flow among unknown subgraphs."));
38
41 cl::desc("Join isolated components having positive flow."));
42
45 cl::desc("The cost of increasing a block's count by one."));
46
49 cl::desc("The cost of decreasing a block's count by one."));
50
52 "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
53 cl::desc("The cost of increasing the entry block's count by one."));
54
56 "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
57 cl::desc("The cost of decreasing the entry block's count by one."));
58
61 cl::desc("The cost of increasing a count of zero-weight block by one."));
62
64 "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
65 cl::desc("The cost of increasing an unknown block's count by one."));
66
67
68
69
70static constexpr int64_t INF = ((int64_t)1) << 50;
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86class MinCostMaxFlow {
87public:
88 MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
89
90
92 Source = SourceNode;
94
95 Nodes = std::vector(NodeCount);
96 Edges = std::vector<std::vector>(NodeCount, std::vector());
98 AugmentingEdges =
99 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
100 }
101
102
103 int64_t run() {
104 LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
105
106
107
108 size_t AugmentationIters = applyFlowAugmentation();
109
110
111 int64_t TotalCost = 0;
112 int64_t TotalFlow = 0;
113 for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
114 for (auto &Edge : Edges[Src]) {
115 if (Edge.Flow > 0) {
116 TotalCost += Edge.Cost * Edge.Flow;
117 if (Src == Source)
118 TotalFlow += Edge.Flow;
119 }
120 }
121 }
122 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
123 << " iterations with " << TotalFlow << " total flow"
124 << " of " << TotalCost << " cost\n");
125 (void)TotalFlow;
126 (void)AugmentationIters;
127 return TotalCost;
128 }
129
130
131
132
134 assert(Capacity > 0 && "adding an edge of zero capacity");
135 assert(Src != Dst && "loop edge are not supported");
136
137 Edge SrcEdge;
138 SrcEdge.Dst = Dst;
139 SrcEdge.Cost = Cost;
140 SrcEdge.Capacity = Capacity;
141 SrcEdge.Flow = 0;
142 SrcEdge.RevEdgeIndex = Edges[Dst].size();
143
144 Edge DstEdge;
145 DstEdge.Dst = Src;
146 DstEdge.Cost = -Cost;
147 DstEdge.Capacity = 0;
148 DstEdge.Flow = 0;
149 DstEdge.RevEdgeIndex = Edges[Src].size();
150
151 Edges[Src].push_back(SrcEdge);
152 Edges[Dst].push_back(DstEdge);
153 }
154
155
158 }
159
160
161
162 std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
163 std::vector<std::pair<uint64_t, int64_t>> Flow;
164 for (const auto &Edge : Edges[Src]) {
165 if (Edge.Flow > 0)
166 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
167 }
169 }
170
171
173 int64_t Flow = 0;
174 for (const auto &Edge : Edges[Src]) {
175 if (Edge.Dst == Dst) {
176 Flow += Edge.Flow;
177 }
178 }
180 }
181
182private:
183
184
185 size_t applyFlowAugmentation() {
186 size_t AugmentationIters = 0;
187 while (findAugmentingPath()) {
188 uint64_t PathCapacity = computeAugmentingPathCapacity();
189 while (PathCapacity > 0) {
190 bool Progress = false;
192
193 identifyShortestEdges(PathCapacity);
194
195
196 auto AugmentingOrder = findAugmentingDAG();
197
198
199 Progress = augmentFlowAlongDAG(AugmentingOrder);
200 PathCapacity = computeAugmentingPathCapacity();
201 }
202
203 if (!Progress) {
204 augmentFlowAlongPath(PathCapacity);
205 PathCapacity = 0;
206 }
207
208 AugmentationIters++;
209 }
210 }
211 return AugmentationIters;
212 }
213
214
215
216 uint64_t computeAugmentingPathCapacity() {
219 while (Now != Source) {
220 uint64_t Pred = Nodes[Now].ParentNode;
221 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
222
223 assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
225 PathCapacity = std::min(PathCapacity, EdgeCapacity);
226
227 Now = Pred;
228 }
229 return PathCapacity;
230 }
231
232
233 bool findAugmentingPath() {
234
235 for (auto &Node : Nodes) {
239 Node.Taken = false;
240 }
241
242 std::queue<uint64_t> Queue;
243 Queue.push(Source);
244 Nodes[Source].Distance = 0;
245 Nodes[Source].Taken = true;
246 while (!Queue.empty()) {
247 uint64_t Src = Queue.front();
248 Queue.pop();
249 Nodes[Src].Taken = false;
250
251
252
253
254
255
256
257
258
259
260
261
262
263
265 break;
266 if (Nodes[Src].Distance > Nodes[Target].Distance)
267 continue;
268
269
270 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
271 auto &Edge = Edges[Src][EdgeIdx];
272 if (Edge.Flow < Edge.Capacity) {
274 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
275 if (Nodes[Dst].Distance > NewDistance) {
276
277 Nodes[Dst].Distance = NewDistance;
278 Nodes[Dst].ParentNode = Src;
279 Nodes[Dst].ParentEdgeIndex = EdgeIdx;
280
281 if (!Nodes[Dst].Taken) {
282 Queue.push(Dst);
283 Nodes[Dst].Taken = true;
284 }
285 }
286 }
287 }
288 }
289
290 return Nodes[Target].Distance != INF;
291 }
292
293
294 void augmentFlowAlongPath(uint64_t PathCapacity) {
295 assert(PathCapacity > 0 && "found an incorrect augmenting path");
297 while (Now != Source) {
298 uint64_t Pred = Nodes[Now].ParentNode;
299 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
300 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
301
302 Edge.Flow += PathCapacity;
303 RevEdge.Flow -= PathCapacity;
304
305 Now = Pred;
306 }
307 }
308
309
310
311
312
313
314
315
316 std::vector<uint64_t> findAugmentingDAG() {
317
318
319
320
321
322 typedef std::pair<uint64_t, uint64_t> StackItemType;
323 std::stack Stack;
324 std::vector<uint64_t> AugmentingOrder;
325
326
327 for (auto &Node : Nodes) {
328 Node.Discovery = 0;
329 Node.Finish = 0;
330 Node.NumCalls = 0;
331 Node.Taken = false;
332 }
334
335
336 Nodes[Target].Taken = true;
337
338
339 Stack.emplace(Source, 0);
340 Nodes[Source].Discovery = ++Time;
341 while (!Stack.empty()) {
342 auto NodeIdx = Stack.top().first;
343 auto EdgeIdx = Stack.top().second;
344
345
346 if (EdgeIdx < Edges[NodeIdx].size()) {
347 auto &Edge = Edges[NodeIdx][EdgeIdx];
348 auto &Dst = Nodes[Edge.Dst];
349 Stack.top().second++;
350
351 if (Edge.OnShortestPath) {
352
353 if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
354 Dst.Discovery = ++Time;
355 Stack.emplace(Edge.Dst, 0);
356 Dst.NumCalls++;
357 } else if (Dst.Taken && Dst.Finish != 0) {
358
359 Nodes[NodeIdx].Taken = true;
360 }
361 }
362 } else {
363
364 Stack.pop();
365
366 if (!Nodes[NodeIdx].Taken) {
367 Nodes[NodeIdx].Discovery = 0;
368 } else {
369
370
371 Nodes[NodeIdx].Finish = ++Time;
372
373 if (NodeIdx != Source) {
374 assert(!Stack.empty() && "empty stack while running dfs");
375 Nodes[Stack.top().first].Taken = true;
376 }
377 AugmentingOrder.push_back(NodeIdx);
378 }
379 }
380 }
381
382 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
383
384
385 for (size_t Src : AugmentingOrder) {
386 AugmentingEdges[Src].clear();
387 for (auto &Edge : Edges[Src]) {
389 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
390 Nodes[Dst].Finish < Nodes[Src].Finish) {
391 AugmentingEdges[Src].push_back(&Edge);
392 }
393 }
394 assert((Src == Target || !AugmentingEdges[Src].empty()) &&
395 "incorrectly constructed augmenting edges");
396 }
397
398 return AugmentingOrder;
399 }
400
401
402
403
404
405 bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
406
407 for (uint64_t Src : AugmentingOrder) {
408 Nodes[Src].FracFlow = 0;
409 Nodes[Src].IntFlow = 0;
410 for (auto &Edge : AugmentingEdges[Src]) {
411 Edge->AugmentedFlow = 0;
412 }
413 }
414
415
417 Nodes[Source].FracFlow = 1.0;
418 for (uint64_t Src : AugmentingOrder) {
419 assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
420 "incorrectly computed fractional flow");
421
422 uint64_t Degree = AugmentingEdges[Src].size();
423 for (auto &Edge : AugmentingEdges[Src]) {
424 double EdgeFlow = Nodes[Src].FracFlow / Degree;
425 Nodes[Edge->Dst].FracFlow += EdgeFlow;
426 if (Edge->Capacity == INF)
427 continue;
428 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
429 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
430 }
431 }
432
433 if (MaxFlowAmount == 0)
434 return false;
435
436
437 Nodes[Source].IntFlow = MaxFlowAmount;
438 for (uint64_t Src : AugmentingOrder) {
440 break;
441
442
443 uint64_t Degree = AugmentingEdges[Src].size();
444
445 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
446 for (auto &Edge : AugmentingEdges[Src]) {
448 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
449 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
450 Nodes[Dst].IntFlow += EdgeFlow;
451 Nodes[Src].IntFlow -= EdgeFlow;
452 Edge->AugmentedFlow += EdgeFlow;
453 }
454 }
455 assert(Nodes[Target].IntFlow <= MaxFlowAmount);
456 Nodes[Target].IntFlow = 0;
457
458
459
460
461 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
462 uint64_t Src = AugmentingOrder[Idx - 1];
463
464
465 for (auto &Edge : AugmentingEdges[Src]) {
467 if (Nodes[Dst].IntFlow == 0)
468 continue;
469 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
470 Nodes[Dst].IntFlow -= EdgeFlow;
471 Nodes[Src].IntFlow += EdgeFlow;
472 Edge->AugmentedFlow -= EdgeFlow;
473 }
474 }
475
476
477 bool HasSaturatedEdges = false;
478 for (uint64_t Src : AugmentingOrder) {
479
480 assert(Src == Source || Nodes[Src].IntFlow == 0);
481 for (auto &Edge : AugmentingEdges[Src]) {
482 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
483
484 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
485 Edge->Flow += Edge->AugmentedFlow;
486 RevEdge.Flow -= Edge->AugmentedFlow;
487 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
488 HasSaturatedEdges = true;
489 }
490 }
491
492
493 return HasSaturatedEdges;
494 }
495
496
497 void identifyShortestEdges(uint64_t PathCapacity) {
498 assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
499
500
501
502
503 uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
504
505
506 for (size_t Src = 0; Src < Nodes.size(); Src++) {
507
508 if (Nodes[Src].Distance > Nodes[Target].Distance)
509 continue;
510
511 for (auto &Edge : Edges[Src]) {
513 Edge.OnShortestPath =
514 Src != Target && Dst != Source &&
515 Nodes[Dst].Distance <= Nodes[Target].Distance &&
516 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
517 Edge.Capacity > Edge.Flow &&
518 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
519 }
520 }
521 }
522
523
524 static constexpr uint64_t MaxDfsCalls = 10;
525
526
527 struct Node {
528
529 int64_t Distance;
530
532
534
535 bool Taken;
536
537
538
539 double FracFlow;
540
542
544
546
548 };
549
550
551 struct Edge {
552
553 int64_t Cost;
554
555 int64_t Capacity;
556
557 int64_t Flow;
558
560
562
563
564
565 bool OnShortestPath;
566
568 };
569
570
571 std::vector Nodes;
572
573 std::vector<std::vector> Edges;
574
576
578
579 std::vector<std::vector<Edge *>> AugmentingEdges;
580
582};
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600class FlowAdjuster {
601public:
603 : Params(Params), Func(Func) {}
604
605
606 void run() {
608
609 joinIsolatedComponents();
610 }
611
613
614 rebalanceUnknownSubgraphs();
615 }
616 }
617
618private:
619 void joinIsolatedComponents() {
620
621 auto Visited = BitVector(NumBlocks(), false);
622 findReachable(Func.Entry, Visited);
623
624
625 for (uint64_t I = 0; I < NumBlocks(); I++) {
626 auto &Block = Func.Blocks[I];
627 if (Block.Flow > 0 && !Visited[I]) {
628
629 auto Path = findShortestPath(I);
630
631 assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
632 "incorrectly computed path adjusting control flow");
633 Func.Blocks[Func.Entry].Flow += 1;
634 for (auto &Jump : Path) {
635 Jump->Flow += 1;
636 Func.Blocks[Jump->Target].Flow += 1;
637
638 findReachable(Jump->Target, Visited);
639 }
640 }
641 }
642 }
643
644
645
647 if (Visited[Src])
648 return;
649 std::queue<uint64_t> Queue;
650 Queue.push(Src);
651 Visited[Src] = true;
652 while (!Queue.empty()) {
653 Src = Queue.front();
654 Queue.pop();
655 for (auto *Jump : Func.Blocks[Src].SuccJumps) {
657 if (Jump->Flow > 0 && !Visited[Dst]) {
658 Queue.push(Dst);
659 Visited[Dst] = true;
660 }
661 }
662 }
663 }
664
665
666
667 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
668
669 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
670
671 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
672
673
674 std::vector<FlowJump *> Result;
677 return Result;
678 }
679
680
681
682
684
685 if (Source == Target)
686 return std::vector<FlowJump *>();
687 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
688 return std::vector<FlowJump *>();
689
690
691 auto Distance = std::vector<int64_t>(NumBlocks(), INF);
692 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
693 Distance[Source] = 0;
694 std::set<std::pair<uint64_t, uint64_t>> Queue;
695 Queue.insert(std::make_pair(Distance[Source], Source));
696
697
698 while (!Queue.empty()) {
699 uint64_t Src = Queue.begin()->second;
700 Queue.erase(Queue.begin());
701
703 (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
704 break;
705
706 for (auto *Jump : Func.Blocks[Src].SuccJumps) {
708 int64_t JumpDist = jumpDistance(Jump);
709 if (Distance[Dst] > Distance[Src] + JumpDist) {
710 Queue.erase(std::make_pair(Distance[Dst], Dst));
711
712 Distance[Dst] = Distance[Src] + JumpDist;
713 Parent[Dst] = Jump;
714
715 Queue.insert(std::make_pair(Distance[Dst], Dst));
716 }
717 }
718 }
719
720 if (Target == AnyExitBlock) {
721 for (uint64_t I = 0; I < NumBlocks(); I++) {
722 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
723 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
725 }
726 }
727 }
728 }
729 assert(Parent[Target] != nullptr && "a path does not exist");
730
731
732 std::vector<FlowJump *> Result;
734 while (Now != Source) {
735 assert(Now == Parent[Now]->Target && "incorrect parent jump");
736 Result.push_back(Parent[Now]);
737 Now = Parent[Now]->Source;
738 }
739
740 std::reverse(Result.begin(), Result.end());
741 return Result;
742 }
743
744
745
746
747
748
749
750
751
752
753 int64_t jumpDistance(FlowJump *Jump) const {
757 std::max(FlowAdjuster::MinBaseDistance,
758 std::min(Func.Blocks[Func.Entry].Flow,
759 Params.CostUnlikely / (2 * (NumBlocks() + 1))));
760 if (Jump->Flow > 0)
761 return BaseDistance + BaseDistance / Jump->Flow;
762 return 2 * BaseDistance * (NumBlocks() + 1);
763 };
764
765 uint64_t NumBlocks() const { return Func.Blocks.size(); }
766
767
768
769
770
771 void rebalanceUnknownSubgraphs() {
772
773 for (const FlowBlock &SrcBlock : Func.Blocks) {
774
775 if (!canRebalanceAtRoot(&SrcBlock))
776 continue;
777
778
779
780 std::vector<FlowBlock *> UnknownBlocks;
781 std::vector<FlowBlock *> KnownDstBlocks;
782 findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
783
784
785
787 if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
788 DstBlock))
789 continue;
790
791
792 if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
793 continue;
794
795
796 rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
797 }
798 }
799
800
801 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
802
803
805 return false;
806
807
808 bool HasUnknownSuccs = false;
809 for (auto *Jump : SrcBlock->SuccJumps) {
810 if (Func.Blocks[Jump->Target].HasUnknownWeight) {
811 HasUnknownSuccs = true;
812 break;
813 }
814 }
815 if (!HasUnknownSuccs)
816 return false;
817
818 return true;
819 }
820
821
822
823 void findUnknownSubgraph(const FlowBlock *SrcBlock,
824 std::vector<FlowBlock *> &KnownDstBlocks,
825 std::vector<FlowBlock *> &UnknownBlocks) {
826
827
828 auto Visited = BitVector(NumBlocks(), false);
829 std::queue<uint64_t> Queue;
830
831 Queue.push(SrcBlock->Index);
832 Visited[SrcBlock->Index] = true;
833 while (!Queue.empty()) {
834 auto &Block = Func.Blocks[Queue.front()];
835 Queue.pop();
836
837 for (auto *Jump : Block.SuccJumps) {
838
839 if (ignoreJump(SrcBlock, nullptr, Jump))
840 continue;
841
843
844 if (Visited[Dst])
845 continue;
846
847 Visited[Dst] = true;
848 if (!Func.Blocks[Dst].HasUnknownWeight) {
849 KnownDstBlocks.push_back(&Func.Blocks[Dst]);
850 } else {
851 Queue.push(Dst);
852 UnknownBlocks.push_back(&Func.Blocks[Dst]);
853 }
854 }
855 }
856 }
857
858
859
860 bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
861 const std::vector<FlowBlock *> &KnownDstBlocks,
862 const std::vector<FlowBlock *> &UnknownBlocks,
864
865 if (UnknownBlocks.empty())
866 return false;
867
868
869 if (KnownDstBlocks.size() > 1)
870 return false;
871 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
872
873
874 for (auto *Block : UnknownBlocks) {
875 if (Block->SuccJumps.empty()) {
876
877 if (DstBlock != nullptr)
878 return false;
879 continue;
880 }
881 size_t NumIgnoredJumps = 0;
882 for (auto *Jump : Block->SuccJumps) {
883 if (ignoreJump(SrcBlock, DstBlock, Jump))
884 NumIgnoredJumps++;
885 }
886
887
888 if (NumIgnoredJumps == Block->SuccJumps.size())
889 return false;
890 }
891
892 return true;
893 }
894
895
896
897 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
899
901 return true;
902
903 auto JumpSource = &Func.Blocks[Jump->Source];
904 auto JumpTarget = &Func.Blocks[Jump->Target];
905
906
907 if (DstBlock != nullptr && JumpTarget == DstBlock)
908 return false;
909
910
911 if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
912 return true;
913
914
915 if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
916 return true;
917
918 return false;
919 }
920
921
922
923 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
924 std::vector<FlowBlock *> &UnknownBlocks) {
925
926 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
928 for (auto *Jump : Block->SuccJumps) {
929 if (ignoreJump(SrcBlock, DstBlock, Jump))
930 continue;
931 LocalInDegree[Jump->Target]++;
932 }
933 };
934 fillInDegree(SrcBlock);
935 for (auto *Block : UnknownBlocks) {
936 fillInDegree(Block);
937 }
938
939 if (LocalInDegree[SrcBlock->Index] > 0)
940 return false;
941
942 std::vector<FlowBlock *> AcyclicOrder;
943 std::queue<uint64_t> Queue;
944 Queue.push(SrcBlock->Index);
945 while (!Queue.empty()) {
947 Queue.pop();
948
949 if (DstBlock != nullptr && Block == DstBlock)
950 break;
951
952
953 if (Block->HasUnknownWeight && Block != SrcBlock)
954 AcyclicOrder.push_back(Block);
955
956
957 for (auto *Jump : Block->SuccJumps) {
958 if (ignoreJump(SrcBlock, DstBlock, Jump))
959 continue;
961 LocalInDegree[Dst]--;
962 if (LocalInDegree[Dst] == 0) {
963 Queue.push(Dst);
964 }
965 }
966 }
967
968
969
970 if (UnknownBlocks.size() != AcyclicOrder.size())
971 return false;
972 UnknownBlocks = AcyclicOrder;
973 return true;
974 }
975
976
977
978 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
980 const std::vector<FlowBlock *> &UnknownBlocks) {
981 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
982
983
985
986 for (auto *Jump : SrcBlock->SuccJumps) {
987 if (ignoreJump(SrcBlock, DstBlock, Jump))
988 continue;
989 BlockFlow += Jump->Flow;
990 }
991 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
992
993
994 for (auto *Block : UnknownBlocks) {
995 assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
997
998 for (auto *Jump : Block->PredJumps) {
999 BlockFlow += Jump->Flow;
1000 }
1001 Block->Flow = BlockFlow;
1002 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
1003 }
1004 }
1005
1006
1007
1008 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
1010
1011 size_t BlockDegree = 0;
1012 for (auto *Jump : Block->SuccJumps) {
1013 if (ignoreJump(SrcBlock, DstBlock, Jump))
1014 continue;
1015 BlockDegree++;
1016 }
1017
1018 if (DstBlock == nullptr && BlockDegree == 0)
1019 return;
1020 assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1021
1022
1023
1024 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1025 for (auto *Jump : Block->SuccJumps) {
1026 if (ignoreJump(SrcBlock, DstBlock, Jump))
1027 continue;
1028 uint64_t Flow = std::min(SuccFlow, BlockFlow);
1030 BlockFlow -= Flow;
1031 }
1032 assert(BlockFlow == 0 && "not all flow is propagated");
1033 }
1034
1035
1037
1038 static constexpr uint64_t MinBaseDistance = 10000;
1039
1040
1042
1044};
1045
1046std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1048std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1050
1051
1052
1053
1054
1055
1056void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
1058 uint64_t NumBlocks = Func.Blocks.size();
1059 assert(NumBlocks > 1 && "Too few blocks in a function");
1060 uint64_t NumJumps = Func.Jumps.size();
1061 assert(NumJumps > 0 && "Too few jumps in a function");
1062
1063
1064
1065
1066
1071
1072 Network.initialize(2 * NumBlocks + 4, S1, T1);
1073
1074
1075 for (uint64_t B = 0; B < NumBlocks; B++) {
1076 auto &Block = Func.Blocks[B];
1077
1078
1079
1082
1083
1084 if (Block.isEntry()) {
1085 Network.addEdge(S, Bin, 0);
1086 } else if (Block.isExit()) {
1087 Network.addEdge(Bout, T, 0);
1088 }
1089
1090
1091 auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
1092
1093
1094 Network.addEdge(Bin, Bout, AuxCostInc);
1095 if (Block.Weight > 0) {
1096 Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
1097 Network.addEdge(S1, Bout, Block.Weight, 0);
1098 Network.addEdge(Bin, T1, Block.Weight, 0);
1099 }
1100 }
1101
1102
1103 for (uint64_t J = 0; J < NumJumps; J++) {
1104 auto &Jump = Func.Jumps[J];
1105
1106
1109
1110
1111 auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1112
1113
1114 Network.addEdge(Jin, Jout, AuxCostInc);
1115 if (Jump.Weight > 0) {
1116 Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
1117 Network.addEdge(S1, Jout, Jump.Weight, 0);
1118 Network.addEdge(Jin, T1, Jump.Weight, 0);
1119 }
1120 }
1121
1122
1123 Network.addEdge(T, S, 0);
1124}
1125
1126
1127std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1129
1130 if (Block.IsUnlikely)
1132
1133
1136
1137 if (Block.HasUnknownWeight) {
1139 CostDec = 0;
1140 } else {
1141
1142
1143 if (Block.Weight == 0)
1145
1146 if (Block.isEntry()) {
1149 }
1150 }
1151 return std::make_pair(CostInc, CostDec);
1152}
1153
1154
1155std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1157
1160
1161
1164
1166
1169 }
1171
1174 else
1176 CostDec = 0;
1177 }
1178 return std::make_pair(CostInc, CostDec);
1179}
1180
1181
1182void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
1184 uint64_t NumBlocks = Func.Blocks.size();
1185 uint64_t NumJumps = Func.Jumps.size();
1186
1187
1188 for (uint64_t J = 0; J < NumJumps; J++) {
1189 auto &Jump = Func.Jumps[J];
1192
1193 int64_t Flow = 0;
1194 int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
1196 Flow = int64_t(Jump.Weight) + AuxFlow;
1197 else
1198 Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
1199
1201 assert(Flow >= 0 && "negative jump flow");
1202 }
1203
1204
1205 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1206 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1207 for (auto &Jump : Func.Jumps) {
1210 }
1211 for (uint64_t B = 0; B < NumBlocks; B++) {
1212 auto &Block = Func.Blocks[B];
1213 Block.Flow = std::max(OutFlow[B], InFlow[B]);
1214 }
1215}
1216
1217#ifndef NDEBUG
1218
1220
1221 assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
1222 size_t NumExitBlocks = 0;
1223 for (size_t I = 1; I < Func.Blocks.size(); I++) {
1224 assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
1225 if (Func.Blocks[I].isExit())
1226 NumExitBlocks++;
1227 }
1228 assert(NumExitBlocks > 0 && "cannot find exit blocks");
1229
1230
1231 for (auto &Block : Func.Blocks) {
1232 std::unordered_set<uint64_t> UniqueSuccs;
1233 for (auto &Jump : Block.SuccJumps) {
1234 auto It = UniqueSuccs.insert(Jump->Target);
1235 assert(It.second && "input CFG contains parallel edges");
1236 }
1237 }
1238
1239 for (auto &Block : Func.Blocks) {
1241 "a block cannot be an entry and an exit");
1242 }
1243
1244 for (auto &Block : Func.Blocks) {
1246 "non-zero weight of a block w/o weight except for an entry");
1247 }
1248
1249 for (auto &Jump : Func.Jumps) {
1251 "non-zero weight of a jump w/o weight");
1252 }
1253}
1254
1255
1256void verifyOutput(const FlowFunction &Func) {
1257 const uint64_t NumBlocks = Func.Blocks.size();
1258 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1259 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1260 for (const auto &Jump : Func.Jumps) {
1263 }
1264
1267 for (uint64_t I = 0; I < NumBlocks; I++) {
1268 auto &Block = Func.Blocks[I];
1269 if (Block.isEntry()) {
1270 TotalInFlow += Block.Flow;
1271 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1272 } else if (Block.isExit()) {
1273 TotalOutFlow += Block.Flow;
1274 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1275 } else {
1276 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1277 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1278 }
1279 }
1280 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1281
1282
1283
1284
1285 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1286 for (const auto &Jump : Func.Jumps) {
1287 if (Jump.Flow > 0) {
1288 PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1289 }
1290 }
1291
1292
1293 std::queue<uint64_t> Queue;
1294 auto Visited = BitVector(NumBlocks, false);
1295 Queue.push(Func.Entry);
1296 Visited[Func.Entry] = true;
1297 while (!Queue.empty()) {
1298 uint64_t Src = Queue.front();
1299 Queue.pop();
1300 for (uint64_t Dst : PositiveFlowEdges[Src]) {
1301 if (!Visited[Dst]) {
1302 Queue.push(Dst);
1303 Visited[Dst] = true;
1304 }
1305 }
1306 }
1307
1308
1309
1310 for (uint64_t I = 0; I < NumBlocks; I++) {
1311 auto &Block = Func.Blocks[I];
1312 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1313 }
1314}
1315#endif
1316
1317}
1318
1319
1320
1322
1323 bool HasSamples = false;
1325 if (Block.Weight > 0)
1326 HasSamples = true;
1328 }
1329 for (FlowJump &Jump : Func.Jumps) {
1330 if (Jump.Weight > 0)
1331 HasSamples = true;
1333 }
1334
1335
1336 if (Func.Blocks.size() <= 1 || !HasSamples)
1337 return;
1338
1339#ifndef NDEBUG
1340
1341 verifyInput(Func);
1342#endif
1343
1344
1345 auto InferenceNetwork = MinCostMaxFlow(Params);
1346 initializeNetwork(Params, InferenceNetwork, Func);
1347 InferenceNetwork.run();
1348
1349
1350 extractWeights(Params, InferenceNetwork, Func);
1351
1352
1353 auto Adjuster = FlowAdjuster(Params, Func);
1354 Adjuster.run();
1355
1356#ifndef NDEBUG
1357
1358 verifyOutput(Func);
1359#endif
1360}
1361
1362
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
This file provides the interface for the profile inference algorithm, profi.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Target - Wrapper for Target specific information.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)
Apply the profile inference algorithm for a given function and provided profi options.
Definition SampleProfileInference.cpp:1321
A wrapper of a binary basic block.
std::vector< FlowJump * > SuccJumps
A wrapper of binary function with basic blocks and jumps.
A wrapper of a jump between two basic blocks.
Various thresholds and options controlling the behavior of the profile inference algorithm.
unsigned CostJumpUnknownFTInc
The cost of increasing an unknown fall-through jump's count by one.
unsigned CostBlockInc
The cost of increasing a block's count by one.
unsigned CostJumpFTInc
The cost of increasing a fall-through jump's count by one.
bool RebalanceUnknown
Evenly re-distribute flow among unknown subgraphs.
const int64_t CostUnlikely
The cost of taking an unlikely block/jump.
unsigned CostJumpDec
The cost of decreasing a jump's count by one.
bool JoinIslands
Join isolated components having positive flow.
unsigned CostBlockZeroInc
The cost of increasing a count of zero-weight block by one.
unsigned CostBlockEntryDec
The cost of decreasing the entry block's count by one.
unsigned CostJumpInc
The cost of increasing a jump's count by one.
unsigned CostJumpUnknownInc
The cost of increasing an unknown jump's count by one.
unsigned CostBlockUnknownInc
The cost of increasing an unknown block's count by one.
unsigned CostJumpFTDec
The cost of decreasing a fall-through jump's count by one.
unsigned CostBlockDec
The cost of decreasing a block's count by one.
unsigned CostBlockEntryInc
The cost of increasing the entry block's count by one.
bool EvenFlowDistribution
Evenly distribute flow when there are multiple equally likely options.